Use enum for target_register_timer_callback()
[openocd.git] / src / target / aarch64.c
index 78d19731e757a65790fd6d5f736dbca5de7f8c4f..c9cedea82c0504e85cbc19e04fa3dbf7acdcf986 100644 (file)
 #include "register.h"
 #include "target_request.h"
 #include "target_type.h"
-#include "arm_opcodes.h"
+#include "armv8_opcodes.h"
+#include "armv8_cache.h"
+#include "arm_semihosting.h"
 #include <helper/time_support.h>
 
+enum restart_mode {
+       RESTART_LAZY,
+       RESTART_SYNC,
+};
+
+enum halt_mode {
+       HALT_LAZY,
+       HALT_SYNC,
+};
+
+struct aarch64_private_config {
+       struct adiv5_private_config adiv5_config;
+       struct arm_cti *cti;
+};
+
 static int aarch64_poll(struct target *target);
 static int aarch64_debug_entry(struct target *target);
 static int aarch64_restore_context(struct target *target, bool bpwp);
@@ -43,35 +60,69 @@ static int aarch64_unset_breakpoint(struct target *target,
 static int aarch64_mmu(struct target *target, int *enabled);
 static int aarch64_virt2phys(struct target *target,
        target_addr_t virt, target_addr_t *phys);
-static int aarch64_read_apb_ab_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        uint64_t address, uint32_t size, uint32_t count, uint8_t *buffer);
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data);
+
+#define foreach_smp_target(pos, head) \
+       for (pos = head; (pos != NULL); pos = pos->next)
 
 static int aarch64_restore_system_control_reg(struct target *target)
 {
+       enum arm_mode target_mode = ARM_MODE_ANY;
        int retval = ERROR_OK;
+       uint32_t instr;
 
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
 
        if (aarch64->system_control_reg != aarch64->system_control_reg_curr) {
                aarch64->system_control_reg_curr = aarch64->system_control_reg;
-               retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                    0xd5181000,
-                                                    aarch64->system_control_reg);
+               /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_v8->cp15_control_reg); */
+
+               switch (armv8->arm.core_mode) {
+               case ARMV8_64_EL0T:
+                       target_mode = ARMV8_64_EL1H;
+                       /* fall through */
+               case ARMV8_64_EL1T:
+               case ARMV8_64_EL1H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
+                       break;
+               case ARMV8_64_EL2T:
+               case ARMV8_64_EL2H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
+                       break;
+               case ARMV8_64_EL3H:
+               case ARMV8_64_EL3T:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
+                       break;
+
+               case ARM_MODE_SVC:
+               case ARM_MODE_ABT:
+               case ARM_MODE_FIQ:
+               case ARM_MODE_IRQ:
+               case ARM_MODE_SYS:
+                       instr = ARMV4_5_MCR(15, 0, 0, 1, 0, 0);
+                       break;
+
+               default:
+                       LOG_INFO("cannot read system control register in this mode");
+                       return ERROR_FAIL;
+               }
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+               retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr, aarch64->system_control_reg);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
        }
 
        return retval;
 }
 
-/*  check address before aarch64_apb read write access with mmu on
- *  remove apb predictible data abort */
-static int aarch64_check_address(struct target *target, uint32_t address)
-{
-       /* TODO */
-       return ERROR_OK;
-}
 /*  modify system_control_reg in order to enable or disable mmu for :
  *  - virt2phys address conversion
  *  - read or write memory in phys or virt address */
@@ -80,34 +131,59 @@ static int aarch64_mmu_modify(struct target *target, int enable)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval = ERROR_OK;
+       uint32_t instr = 0;
 
        if (enable) {
-               /*  if mmu enabled at target stop and mmu not enable */
+               /*      if mmu enabled at target stop and mmu not enable */
                if (!(aarch64->system_control_reg & 0x1U)) {
                        LOG_ERROR("trying to enable mmu on target stopped with mmu disable");
                        return ERROR_FAIL;
                }
-               if (!(aarch64->system_control_reg_curr & 0x1U)) {
+               if (!(aarch64->system_control_reg_curr & 0x1U))
                        aarch64->system_control_reg_curr |= 0x1U;
-                       retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                            0xd5181000,
-                                                            aarch64->system_control_reg_curr);
-               }
        } else {
                if (aarch64->system_control_reg_curr & 0x4U) {
                        /*  data cache is active */
                        aarch64->system_control_reg_curr &= ~0x4U;
-                       /* flush data cache armv7 function to be called */
+                       /* flush data cache armv8 function to be called */
                        if (armv8->armv8_mmu.armv8_cache.flush_all_data_cache)
                                armv8->armv8_mmu.armv8_cache.flush_all_data_cache(target);
                }
                if ((aarch64->system_control_reg_curr & 0x1U)) {
                        aarch64->system_control_reg_curr &= ~0x1U;
-                       retval = aarch64_instr_write_data_r0(armv8->arm.dpm,
-                                                            0xd5181000,
-                                                            aarch64->system_control_reg_curr);
                }
        }
+
+       switch (armv8->arm.core_mode) {
+       case ARMV8_64_EL0T:
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
+               break;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
+               break;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
+               break;
+
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_IRQ:
+       case ARM_MODE_SYS:
+               instr = ARMV4_5_MCR(15, 0, 0, 1, 0, 0);
+               break;
+
+       default:
+               LOG_DEBUG("unknown cpu state 0x%" PRIx32, armv8->arm.core_mode);
+               break;
+       }
+
+       retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr,
+                               aarch64->system_control_reg_curr);
        return retval;
 }
 
@@ -120,89 +196,46 @@ static int aarch64_init_debug_access(struct target *target)
        int retval;
        uint32_t dummy;
 
-       LOG_DEBUG(" ");
+       LOG_DEBUG("%s", target_name(target));
 
-       /* Unlocking the debug registers for modification
-        * The debugport might be uninitialised so try twice */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                            armv8->debug_base + CPUDBG_LOCKACCESS, 0xC5ACCE55);
+                       armv8->debug_base + CPUV8_DBG_OSLAR, 0);
        if (retval != ERROR_OK) {
-               /* try again */
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                            armv8->debug_base + CPUDBG_LOCKACCESS, 0xC5ACCE55);
-               if (retval == ERROR_OK)
-                       LOG_USER("Locking debug access failed on first, but succeeded on second try.");
-       }
-       if (retval != ERROR_OK)
+               LOG_DEBUG("Examine %s failed", "oslock");
                return retval;
+       }
+
        /* Clear Sticky Power Down status Bit in PRSR to enable access to
           the registers in the Core Power Domain */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_PRSR, &dummy);
+                       armv8->debug_base + CPUV8_DBG_PRSR, &dummy);
        if (retval != ERROR_OK)
                return retval;
 
-       /* Enabling of instruction execution in debug mode is done in debug_entry code */
-
-       /* Resync breakpoint registers */
-
-       /* Since this is likely called from init or reset, update target state information*/
-       return aarch64_poll(target);
-}
-
-/* To reduce needless round-trips, pass in a pointer to the current
- * DSCR value.  Initialize it to zero if you just need to know the
- * value on return from this function; or DSCR_INSTR_COMP if you
- * happen to know that no instruction is pending.
- */
-static int aarch64_exec_opcode(struct target *target,
-       uint32_t opcode, uint32_t *dscr_p)
-{
-       uint32_t dscr;
-       int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       dscr = dscr_p ? *dscr_p : 0;
-
-       LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
-
-       /* Wait for InstrCompl bit to be set */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_INSTR_COMP) == 0) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register, opcode = 0x%08" PRIx32, opcode);
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       }
+       /*
+        * Static CTI configuration:
+        * Channel 0 -> trigger outputs HALT request to PE
+        * Channel 1 -> trigger outputs Resume request to PE
+        * Gate all channel trigger events from entering the CTM
+        */
 
-       retval = mem_ap_write_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_ITR, opcode);
+       /* Enable CTI */
+       retval = arm_cti_enable(armv8->cti, true);
+       /* By default, gate all channel events to and from the CTM */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* output halt requests to PE on channel 0 event */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN0, CTI_CHNL(0));
+       /* output restart requests to PE on channel 1 event */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN1, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
-       then = timeval_ms();
-       do {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register");
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for aarch64_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       } while ((dscr & DSCR_INSTR_COMP) == 0);        /* Wait for InstrCompl bit to be set */
-
-       if (dscr_p)
-               *dscr_p = dscr;
+       /* Resync breakpoint registers */
 
-       return retval;
+       return ERROR_OK;
 }
 
 /* Write to memory mapped registers directly with no cache or mmu handling */
@@ -218,804 +251,586 @@ static int aarch64_dap_write_memap_register_u32(struct target *target,
        return retval;
 }
 
-/*
- * AARCH64 implementation of Debug Programmer's Model
- *
- * NOTE the invariant:  these routines return with DSCR_INSTR_COMP set,
- * so there's no need to poll for it before executing an instruction.
- *
- * NOTE that in several of these cases the "stall" mode might be useful.
- * It'd let us queue a few operations together... prepare/finish might
- * be the places to enable/disable that mode.
- */
-
-static inline struct aarch64_common *dpm_to_a8(struct arm_dpm *dpm)
+static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
 {
-       return container_of(dpm, struct aarch64_common, armv8_common.dpm);
-}
+       struct arm_dpm *dpm = &a8->armv8_common.dpm;
+       int retval;
 
-static int aarch64_write_dcc(struct aarch64_common *a8, uint32_t data)
-{
-       LOG_DEBUG("write DCC 0x%08" PRIx32, data);
-       return mem_ap_write_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DTRRX, data);
+       dpm->arm = &a8->armv8_common.arm;
+       dpm->didr = debug;
+
+       retval = armv8_dpm_setup(dpm);
+       if (retval == ERROR_OK)
+               retval = armv8_dpm_initialize(dpm);
+
+       return retval;
 }
 
-static int aarch64_write_dcc_64(struct aarch64_common *a8, uint64_t data)
+static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
 {
-       int ret;
-       LOG_DEBUG("write DCC 0x%08" PRIx32, (unsigned)data);
-       LOG_DEBUG("write DCC 0x%08" PRIx32, (unsigned)(data >> 32));
-       ret = mem_ap_write_u32(a8->armv8_common.debug_ap,
-                              a8->armv8_common.debug_base + CPUDBG_DTRRX, data);
-       ret += mem_ap_write_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DTRTX, data >> 32);
-       return ret;
+       struct armv8_common *armv8 = target_to_armv8(target);
+       return armv8_set_dbgreg_bits(armv8, CPUV8_DBG_DSCR, bit_mask, value);
 }
 
-static int aarch64_read_dcc(struct aarch64_common *a8, uint32_t *data,
-       uint32_t *dscr_p)
+static int aarch64_check_state_one(struct target *target,
+               uint32_t mask, uint32_t val, int *p_result, uint32_t *p_prsr)
 {
-       uint32_t dscr = DSCR_INSTR_COMP;
+       struct armv8_common *armv8 = target_to_armv8(target);
+       uint32_t prsr;
        int retval;
 
-       if (dscr_p)
-               dscr = *dscr_p;
-
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                                           a8->armv8_common.debug_base + CPUDBG_DTRTX,
-                                           data);
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_PRSR, &prsr);
        if (retval != ERROR_OK)
                return retval;
-       LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
 
-       if (dscr_p)
-               *dscr_p = dscr;
+       if (p_prsr)
+               *p_prsr = prsr;
 
-       return retval;
+       if (p_result)
+               *p_result = (prsr & mask) == (val & mask);
+
+       return ERROR_OK;
 }
-static int aarch64_read_dcc_64(struct aarch64_common *a8, uint64_t *data,
-       uint32_t *dscr_p)
+
+static int aarch64_wait_halt_one(struct target *target)
 {
-       uint32_t dscr = DSCR_INSTR_COMP;
-       uint32_t higher;
-       int retval;
+       int retval = ERROR_OK;
+       uint32_t prsr;
 
-       if (dscr_p)
-               dscr = *dscr_p;
+       int64_t then = timeval_ms();
+       for (;;) {
+               int halted;
+
+               retval = aarch64_check_state_one(target, PRSR_HALT, PRSR_HALT, &halted, &prsr);
+               if (retval != ERROR_OK || halted)
+                       break;
 
-       /* Wait for DTRRXfull */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DSCR,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
                if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for read dcc");
-                       return ERROR_FAIL;
+                       retval = ERROR_TARGET_TIMEOUT;
+                       LOG_DEBUG("target %s timeout, prsr=0x%08"PRIx32, target_name(target), prsr);
+                       break;
                }
        }
+       return retval;
+}
 
-       retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                                           a8->armv8_common.debug_base + CPUDBG_DTRTX,
-                                           (uint32_t *)data);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                                           a8->armv8_common.debug_base + CPUDBG_DTRRX,
-                                           &higher);
-       if (retval != ERROR_OK)
-               return retval;
+static int aarch64_prepare_halt_smp(struct target *target, bool exc_target, struct target **p_first)
+{
+       int retval = ERROR_OK;
+       struct target_list *head = target->head;
+       struct target *first = NULL;
 
-       *data = *(uint32_t *)data | (uint64_t)higher << 32;
-       LOG_DEBUG("read DCC 0x%16.16" PRIx64, *data);
+       LOG_DEBUG("target %s exc %i", target_name(target), exc_target);
 
-       if (dscr_p)
-               *dscr_p = dscr;
+       while (head != NULL) {
+               struct target *curr = head->target;
+               struct armv8_common *armv8 = target_to_armv8(curr);
+               head = head->next;
 
-       return retval;
-}
+               if (exc_target && curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_RUNNING)
+                       continue;
 
-static int aarch64_dpm_prepare(struct arm_dpm *dpm)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr;
-       int retval;
+               /* HACK: mark this target as prepared for halting */
+               curr->debug_reason = DBG_REASON_DBGRQ;
 
-       /* set up invariant:  INSTR_COMP is set after ever DPM operation */
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(a8->armv8_common.debug_ap,
-                               a8->armv8_common.debug_base + CPUDBG_DSCR,
-                               &dscr);
+               /* open the gate for channel 0 to let HALT requests pass to the CTM */
+               retval = arm_cti_ungate_channel(armv8->cti, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_set_dscr_bits(curr, DSCR_HDE, DSCR_HDE);
                if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCR_INSTR_COMP) != 0)
                        break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for dpm prepare");
-                       return ERROR_FAIL;
-               }
+
+               LOG_DEBUG("target %s prepared", target_name(curr));
+
+               if (first == NULL)
+                       first = curr;
        }
 
-       /* this "should never happen" ... */
-       if (dscr & DSCR_DTR_RX_FULL) {
-               LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
-               /* Clear DCCRX */
-               retval = aarch64_exec_opcode(
-                               a8->armv8_common.arm.target,
-                               0xd5130400,
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
+       if (p_first) {
+               if (exc_target && first)
+                       *p_first = first;
+               else
+                       *p_first = target;
        }
 
        return retval;
 }
 
-static int aarch64_dpm_finish(struct arm_dpm *dpm)
+static int aarch64_halt_one(struct target *target, enum halt_mode mode)
 {
-       /* REVISIT what could be done here? */
-       return ERROR_OK;
-}
+       int retval = ERROR_OK;
+       struct armv8_common *armv8 = target_to_armv8(target);
 
-static int aarch64_instr_write_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_INSTR_COMP;
+       LOG_DEBUG("%s", target_name(target));
 
-       retval = aarch64_write_dcc(a8, data);
+       /* allow Halting Debug Mode */
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK)
                return retval;
 
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-}
-
-static int aarch64_instr_write_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_INSTR_COMP;
-
-       retval = aarch64_write_dcc_64(a8, data);
+       /* trigger an event on channel 0, this outputs a halt request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 0);
        if (retval != ERROR_OK)
                return retval;
 
-       return aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+       if (mode == HALT_SYNC) {
+               retval = aarch64_wait_halt_one(target);
+               if (retval != ERROR_OK) {
+                       if (retval == ERROR_TARGET_TIMEOUT)
+                               LOG_ERROR("Timeout waiting for target %s halt", target_name(target));
+                       return retval;
+               }
+       }
+
+       return ERROR_OK;
 }
 
-static int aarch64_instr_write_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t data)
+static int aarch64_halt_smp(struct target *target, bool exc_target)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_INSTR_COMP;
+       struct target *next = target;
        int retval;
 
-       retval = aarch64_write_dcc(a8, data);
-       if (retval != ERROR_OK)
-               return retval;
+       /* prepare halt on all PEs of the group */
+       retval = aarch64_prepare_halt_smp(target, exc_target, &next);
 
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5330500,
-                       &dscr);
-       if (retval != ERROR_OK)
+       if (exc_target && next == target)
                return retval;
 
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-
-       return retval;
-}
-
-static int aarch64_instr_write_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_INSTR_COMP;
-       int retval;
-
-       retval = aarch64_write_dcc_64(a8, data);
-       if (retval != ERROR_OK)
-               return retval;
+       /* halt the target PE */
+       if (retval == ERROR_OK)
+               retval = aarch64_halt_one(next, HALT_LAZY);
 
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5330400,
-                       &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       /* then the opcode, taking data from R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+       /* wait for all PEs to halt */
+       int64_t then = timeval_ms();
+       for (;;) {
+               bool all_halted = true;
+               struct target_list *head;
+               struct target *curr;
 
-       return retval;
-}
-
-static int aarch64_instr_cpsr_sync(struct arm_dpm *dpm)
-{
-       struct target *target = dpm->arm->target;
-       uint32_t dscr = DSCR_INSTR_COMP;
+               foreach_smp_target(head, target->head) {
+                       int halted;
 
-       /* "Prefetch flush" after modifying execution status in CPSR */
-       return aarch64_exec_opcode(target,
-                       ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
-                       &dscr);
-}
+                       curr = head->target;
 
-static int aarch64_instr_read_data_dcc(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_INSTR_COMP;
+                       if (!target_was_examined(curr))
+                               continue;
 
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+                       retval = aarch64_check_state_one(curr, PRSR_HALT, PRSR_HALT, &halted, NULL);
+                       if (retval != ERROR_OK || !halted) {
+                               all_halted = false;
+                               break;
+                       }
+               }
 
-       return aarch64_read_dcc(a8, data, &dscr);
-}
+               if (all_halted)
+                       break;
 
-static int aarch64_instr_read_data_dcc_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       int retval;
-       uint32_t dscr = DSCR_INSTR_COMP;
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
 
-       /* the opcode, writing data to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit halt for the second cluster.
+                */
+               retval = aarch64_halt_one(curr, HALT_LAZY);
+               if (retval != ERROR_OK)
+                       break;
+       }
 
-       return aarch64_read_dcc_64(a8, data, &dscr);
+       return retval;
 }
 
-static int aarch64_instr_read_data_r0(struct arm_dpm *dpm,
-       uint32_t opcode, uint32_t *data)
+static int update_halt_gdb(struct target *target, enum target_debug_reason debug_reason)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_INSTR_COMP;
-       int retval;
+       struct target *gdb_target = NULL;
+       struct target_list *head;
+       struct target *curr;
 
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       if (debug_reason == DBG_REASON_NOTHALTED) {
+               LOG_DEBUG("Halting remaining targets in SMP group");
+               aarch64_halt_smp(target, true);
+       }
 
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5130400,  /* msr dbgdtr_el0, x0 */
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       /* poll all targets in the group, but skip the target that serves GDB */
+       foreach_smp_target(head, target->head) {
+               curr = head->target;
+               /* skip calling context */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               /* skip targets that were already halted */
+               if (curr->state == TARGET_HALTED)
+                       continue;
+               /* remember the gdb_service->target */
+               if (curr->gdb_service != NULL)
+                       gdb_target = curr->gdb_service->target;
+               /* skip it */
+               if (curr == gdb_target)
+                       continue;
+
+               /* avoid recursion in aarch64_poll() */
+               curr->smp = 0;
+               aarch64_poll(curr);
+               curr->smp = 1;
+       }
+
+       /* after all targets were updated, poll the gdb serving target */
+       if (gdb_target != NULL && gdb_target != target)
+               aarch64_poll(gdb_target);
 
-       return aarch64_read_dcc(a8, data, &dscr);
+       return ERROR_OK;
 }
 
-static int aarch64_instr_read_data_r0_64(struct arm_dpm *dpm,
-       uint32_t opcode, uint64_t *data)
+/*
+ * Aarch64 Run control
+ */
+
+static int aarch64_poll(struct target *target)
 {
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t dscr = DSCR_INSTR_COMP;
-       int retval;
+       enum target_state prev_target_state;
+       int retval = ERROR_OK;
+       int halted;
 
-       /* the opcode, writing data to R0 */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       opcode,
-                       &dscr);
+       retval = aarch64_check_state_one(target,
+                               PRSR_HALT, PRSR_HALT, &halted, NULL);
        if (retval != ERROR_OK)
                return retval;
 
-       /* write R0 to DCC */
-       retval = aarch64_exec_opcode(
-                       a8->armv8_common.arm.target,
-                       0xd5130400,  /* msr dbgdtr_el0, x0 */
-                       &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+       if (halted) {
+               prev_target_state = target->state;
+               if (prev_target_state != TARGET_HALTED) {
+                       enum target_debug_reason debug_reason = target->debug_reason;
 
-       return aarch64_read_dcc_64(a8, data, &dscr);
-}
+                       /* We have a halting debug event */
+                       target->state = TARGET_HALTED;
+                       LOG_DEBUG("Target %s halted", target_name(target));
+                       retval = aarch64_debug_entry(target);
+                       if (retval != ERROR_OK)
+                               return retval;
 
-static int aarch64_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
-       uint32_t addr, uint32_t control)
-{
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t vr = a8->armv8_common.debug_base;
-       uint32_t cr = a8->armv8_common.debug_base;
-       int retval;
+                       if (target->smp)
+                               update_halt_gdb(target, debug_reason);
 
-       switch (index_t) {
-               case 0 ... 15:  /* breakpoints */
-                       vr += CPUDBG_BVR_BASE;
-                       cr += CPUDBG_BCR_BASE;
-                       break;
-               case 16 ... 31: /* watchpoints */
-                       vr += CPUDBG_WVR_BASE;
-                       cr += CPUDBG_WCR_BASE;
-                       index_t -= 16;
-                       break;
-               default:
-                       return ERROR_FAIL;
-       }
-       vr += 4 * index_t;
-       cr += 4 * index_t;
+                       if (arm_semihosting(target, &retval) != 0)
+                               return retval;
 
-       LOG_DEBUG("A8: bpwp enable, vr %08x cr %08x",
-               (unsigned) vr, (unsigned) cr);
+                       switch (prev_target_state) {
+                       case TARGET_RUNNING:
+                       case TARGET_UNKNOWN:
+                       case TARGET_RESET:
+                               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
+                               break;
+                       case TARGET_DEBUG_RUNNING:
+                               target_call_event_callbacks(target, TARGET_EVENT_DEBUG_HALTED);
+                               break;
+                       default:
+                               break;
+                       }
+               }
+       } else
+               target->state = TARGET_RUNNING;
 
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       vr, addr);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_dap_write_memap_register_u32(dpm->arm->target,
-                       cr, control);
        return retval;
 }
 
-static int aarch64_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
+static int aarch64_halt(struct target *target)
 {
-       return ERROR_OK;
-
-#if 0
-       struct aarch64_common *a8 = dpm_to_a8(dpm);
-       uint32_t cr;
-
-       switch (index_t) {
-               case 0 ... 15:
-                       cr = a8->armv8_common.debug_base + CPUDBG_BCR_BASE;
-                       break;
-               case 16 ... 31:
-                       cr = a8->armv8_common.debug_base + CPUDBG_WCR_BASE;
-                       index_t -= 16;
-                       break;
-               default:
-                       return ERROR_FAIL;
-       }
-       cr += 4 * index_t;
+       struct armv8_common *armv8 = target_to_armv8(target);
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_HALT;
 
-       LOG_DEBUG("A8: bpwp disable, cr %08x", (unsigned) cr);
+       if (target->smp)
+               return aarch64_halt_smp(target, false);
 
-       /* clear control register */
-       return aarch64_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
-#endif
+       return aarch64_halt_one(target, HALT_SYNC);
 }
 
-static int aarch64_dpm_setup(struct aarch64_common *a8, uint32_t debug)
+static int aarch64_restore_one(struct target *target, int current,
+       uint64_t *address, int handle_breakpoints, int debug_execution)
 {
-       struct arm_dpm *dpm = &a8->armv8_common.dpm;
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
        int retval;
+       uint64_t resume_pc;
 
-       dpm->arm = &a8->armv8_common.arm;
-       dpm->didr = debug;
-
-       dpm->prepare = aarch64_dpm_prepare;
-       dpm->finish = aarch64_dpm_finish;
+       LOG_DEBUG("%s", target_name(target));
 
-       dpm->instr_write_data_dcc = aarch64_instr_write_data_dcc;
-       dpm->instr_write_data_dcc_64 = aarch64_instr_write_data_dcc_64;
-       dpm->instr_write_data_r0 = aarch64_instr_write_data_r0;
-       dpm->instr_write_data_r0_64 = aarch64_instr_write_data_r0_64;
-       dpm->instr_cpsr_sync = aarch64_instr_cpsr_sync;
+       if (!debug_execution)
+               target_free_all_working_areas(target);
 
-       dpm->instr_read_data_dcc = aarch64_instr_read_data_dcc;
-       dpm->instr_read_data_dcc_64 = aarch64_instr_read_data_dcc_64;
-       dpm->instr_read_data_r0 = aarch64_instr_read_data_r0;
-       dpm->instr_read_data_r0_64 = aarch64_instr_read_data_r0_64;
+       /* current = 1: continue on current pc, otherwise continue at <address> */
+       resume_pc = buf_get_u64(arm->pc->value, 0, 64);
+       if (!current)
+               resume_pc = *address;
+       else
+               *address = resume_pc;
 
-       dpm->arm_reg_current = armv8_reg_current;
-
-       dpm->bpwp_enable = aarch64_bpwp_enable;
-       dpm->bpwp_disable = aarch64_bpwp_disable;
-
-       retval = arm_dpm_setup(dpm);
-       if (retval == ERROR_OK)
-               retval = arm_dpm_initialize(dpm);
-
-       return retval;
-}
-static struct target *get_aarch64(struct target *target, int32_t coreid)
-{
-       struct target_list *head;
-       struct target *curr;
-
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr->coreid == coreid) && (curr->state == TARGET_HALTED))
-                       return curr;
-               head = head->next;
+       /* Make sure that the Armv7 gdb thumb fixups does not
+        * kill the return address
+        */
+       switch (arm->core_state) {
+               case ARM_STATE_ARM:
+                       resume_pc &= 0xFFFFFFFC;
+                       break;
+               case ARM_STATE_AARCH64:
+                       resume_pc &= 0xFFFFFFFFFFFFFFFC;
+                       break;
+               case ARM_STATE_THUMB:
+               case ARM_STATE_THUMB_EE:
+                       /* When the return address is loaded into PC
+                        * bit 0 must be 1 to stay in Thumb state
+                        */
+                       resume_pc |= 0x1;
+                       break;
+               case ARM_STATE_JAZELLE:
+                       LOG_ERROR("How do I resume into Jazelle state??");
+                       return ERROR_FAIL;
        }
-       return target;
-}
-static int aarch64_halt(struct target *target);
+       LOG_DEBUG("resume pc = 0x%016" PRIx64, resume_pc);
+       buf_set_u64(arm->pc->value, 0, 64, resume_pc);
+       arm->pc->dirty = 1;
+       arm->pc->valid = 1;
 
-static int aarch64_halt_smp(struct target *target)
-{
-       int retval = 0;
-       struct target_list *head;
-       struct target *curr;
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_HALTED))
-                       retval += aarch64_halt(curr);
-               head = head->next;
-       }
-       return retval;
-}
+       /* called it now before restoring context because it uses cpu
+        * register r0 for restoring system control register */
+       retval = aarch64_restore_system_control_reg(target);
+       if (retval == ERROR_OK)
+               retval = aarch64_restore_context(target, handle_breakpoints);
 
-static int update_halt_gdb(struct target *target)
-{
-       int retval = 0;
-       if (target->gdb_service && target->gdb_service->core[0] == -1) {
-               target->gdb_service->target = target;
-               target->gdb_service->core[0] = target->coreid;
-               retval += aarch64_halt_smp(target);
-       }
        return retval;
 }
 
-/*
- * Cortex-A8 Run control
+/**
+ * prepare single target for restart
+ *
+ *
  */
-
-static int aarch64_poll(struct target *target)
+static int aarch64_prepare_restart_one(struct target *target)
 {
-       int retval = ERROR_OK;
+       struct armv8_common *armv8 = target_to_armv8(target);
+       int retval;
        uint32_t dscr;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
-       struct armv8_common *armv8 = &aarch64->armv8_common;
-       enum target_state prev_target_state = target->state;
-       /*  toggle to another core is done by gdb as follow */
-       /*  maint packet J core_id */
-       /*  continue */
-       /*  the next polling trigger an halt event sent to gdb */
-       if ((target->state == TARGET_HALTED) && (target->smp) &&
-               (target->gdb_service) &&
-               (target->gdb_service->target == NULL)) {
-               target->gdb_service->target =
-                       get_aarch64(target, target->gdb_service->core[1]);
-               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-               return retval;
-       }
+       uint32_t tmp;
+
+       LOG_DEBUG("%s", target_name(target));
+
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
-       aarch64->cpudbg_dscr = dscr;
 
-       if (DSCR_RUN_MODE(dscr) == (DSCR_CORE_HALTED | DSCR_CORE_RESTARTED)) {
-               if (prev_target_state != TARGET_HALTED) {
-                       /* We have a halting debug event */
-                       LOG_DEBUG("Target halted");
-                       target->state = TARGET_HALTED;
-                       if ((prev_target_state == TARGET_RUNNING)
-                               || (prev_target_state == TARGET_UNKNOWN)
-                               || (prev_target_state == TARGET_RESET)) {
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_HALTED);
-                       }
-                       if (prev_target_state == TARGET_DEBUG_RUNNING) {
-                               LOG_DEBUG(" ");
-
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
+       if ((dscr & DSCR_ITE) == 0)
+               LOG_ERROR("DSCR.ITE must be set before leaving debug!");
+       if ((dscr & DSCR_ERR) != 0)
+               LOG_ERROR("DSCR.ERR must be cleared before leaving debug!");
 
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_DEBUG_HALTED);
-                       }
-               }
-       } else if (DSCR_RUN_MODE(dscr) == DSCR_CORE_RESTARTED)
-               target->state = TARGET_RUNNING;
-       else {
-               LOG_DEBUG("Unknown target state dscr = 0x%08" PRIx32, dscr);
-               target->state = TARGET_UNKNOWN;
+       /* acknowledge a pending CTI halt event */
+       retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
+       /*
+        * open the CTI gate for channel 1 so that the restart events
+        * get passed along to all PEs. Also close gate for channel 0
+        * to isolate the PE from halt events.
+        */
+       if (retval == ERROR_OK)
+               retval = arm_cti_ungate_channel(armv8->cti, 1);
+       if (retval == ERROR_OK)
+               retval = arm_cti_gate_channel(armv8->cti, 0);
+
+       /* make sure that DSCR.HDE is set */
+       if (retval == ERROR_OK) {
+               dscr |= DSCR_HDE;
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
+
+       if (retval == ERROR_OK) {
+               /* clear sticky bits in PRSR, SDR is now 0 */
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_PRSR, &tmp);
        }
 
        return retval;
 }
 
-static int aarch64_halt(struct target *target)
+static int aarch64_do_restart_one(struct target *target, enum restart_mode mode)
 {
-       int retval = ERROR_OK;
-       uint32_t dscr;
        struct armv8_common *armv8 = target_to_armv8(target);
+       int retval;
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0, 1);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x140, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x140, 6);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x140, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa0, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa0, 5);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa0, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa4, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa4, 2);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0xa4, &dscr);
+       LOG_DEBUG("%s", target_name(target));
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x20, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x20, 4);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x20, &dscr);
-
-       /*
-        * enter halting debug mode
-        */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+       /* trigger an event on channel 1, generates a restart request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 1);
        if (retval != ERROR_OK)
                return retval;
 
-#      /* STATUS */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x134, &dscr);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x1c, &dscr);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x1c, 1);
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x1c, &dscr);
-
-
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCR_CORE_HALTED) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for halt");
-                       return ERROR_FAIL;
+       if (mode == RESTART_SYNC) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       int resumed;
+                       /*
+                        * if PRSR.SDR is set now, the target did restart, even
+                        * if it's now already halted again (e.g. due to breakpoint)
+                        */
+                       retval = aarch64_check_state_one(target,
+                                               PRSR_SDR, PRSR_SDR, &resumed, NULL);
+                       if (retval != ERROR_OK || resumed)
+                               break;
+
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("%s: Timeout waiting for resume"PRIx32, target_name(target));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
+                       }
                }
        }
 
-       target->debug_reason = DBG_REASON_DBGRQ;
+       if (retval != ERROR_OK)
+               return retval;
+
+       target->debug_reason = DBG_REASON_NOTHALTED;
+       target->state = TARGET_RUNNING;
 
        return ERROR_OK;
 }
 
-static int aarch64_internal_restore(struct target *target, int current,
-       uint64_t *address, int handle_breakpoints, int debug_execution)
+static int aarch64_restart_one(struct target *target, enum restart_mode mode)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct arm *arm = &armv8->arm;
        int retval;
-       uint64_t resume_pc;
 
-       if (!debug_execution)
-               target_free_all_working_areas(target);
+       LOG_DEBUG("%s", target_name(target));
 
-       /* current = 1: continue on current pc, otherwise continue at <address> */
-       resume_pc = buf_get_u64(arm->pc->value, 0, 64);
-       if (!current)
-               resume_pc = *address;
-       else
-               *address = resume_pc;
+       retval = aarch64_prepare_restart_one(target);
+       if (retval == ERROR_OK)
+               retval = aarch64_do_restart_one(target, mode);
 
-       /* Make sure that the Armv7 gdb thumb fixups does not
-        * kill the return address
-        */
-       switch (arm->core_state) {
-               case ARM_STATE_ARM:
-               case ARM_STATE_AARCH64:
-                       resume_pc &= 0xFFFFFFFFFFFFFFFC;
-                       break;
-               case ARM_STATE_THUMB:
-               case ARM_STATE_THUMB_EE:
-                       /* When the return address is loaded into PC
-                        * bit 0 must be 1 to stay in Thumb state
-                        */
-                       resume_pc |= 0x1;
-                       break;
-               case ARM_STATE_JAZELLE:
-                       LOG_ERROR("How do I resume into Jazelle state??");
-                       return ERROR_FAIL;
-       }
-       LOG_DEBUG("resume pc = 0x%16" PRIx64, resume_pc);
-       buf_set_u64(arm->pc->value, 0, 64, resume_pc);
-       arm->pc->dirty = 1;
-       arm->pc->valid = 1;
-#if 0
-       /* restore dpm_mode at system halt */
-       dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
-#endif
-       /* called it now before restoring context because it uses cpu
-        * register r0 for restoring system control register */
-       retval = aarch64_restore_system_control_reg(target);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_restore_context(target, handle_breakpoints);
-       if (retval != ERROR_OK)
-               return retval;
-       target->debug_reason = DBG_REASON_NOTHALTED;
-       target->state = TARGET_RUNNING;
+       return retval;
+}
 
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
+/*
+ * prepare all but the current target for restart
+ */
+static int aarch64_prep_restart_smp(struct target *target, int handle_breakpoints, struct target **p_first)
+{
+       int retval = ERROR_OK;
+       struct target_list *head;
+       struct target *first = NULL;
+       uint64_t address;
 
-#if 0
-       /* the front-end may request us not to handle breakpoints */
-       if (handle_breakpoints) {
-               /* Single step past breakpoint at current address */
-               breakpoint = breakpoint_find(target, resume_pc);
-               if (breakpoint) {
-                       LOG_DEBUG("unset breakpoint at 0x%8.8x", breakpoint->address);
-                       cortex_m3_unset_breakpoint(target, breakpoint);
-                       cortex_m3_single_step_core(target);
-                       cortex_m3_set_breakpoint(target, breakpoint);
+       foreach_smp_target(head, target->head) {
+               struct target *curr = head->target;
+
+               /* skip calling target */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_HALTED)
+                       continue;
+
+               /*  resume at current address, not in step mode */
+               retval = aarch64_restore_one(curr, 1, &address, handle_breakpoints, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_prepare_restart_one(curr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("failed to restore target %s", target_name(curr));
+                       break;
                }
+               /* remember the first valid target in the group */
+               if (first == NULL)
+                       first = curr;
        }
-#endif
+
+       if (p_first)
+               *p_first = first;
 
        return retval;
 }
 
-static int aarch64_internal_restart(struct target *target)
-{
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct arm *arm = &armv8->arm;
-       int retval;
-       uint32_t dscr;
-       /*
-        * * Restart core and wait for it to be started.  Clear ITRen and sticky
-        * * exception flags: see ARMv7 ARM, C5.9.
-        *
-        * REVISIT: for single stepping, we probably want to
-        * disable IRQs by default, with optional override...
-        */
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               return retval;
+static int aarch64_step_restart_smp(struct target *target)
+{
+       int retval = ERROR_OK;
+       struct target_list *head;
+       struct target *first = NULL;
 
-       if ((dscr & DSCR_INSTR_COMP) == 0)
-               LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
+       LOG_DEBUG("%s", target_name(target));
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, dscr & ~DSCR_ITR_EN);
+       retval = aarch64_prep_restart_smp(target, 0, &first);
        if (retval != ERROR_OK)
                return retval;
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DRCR, DRCR_RESTART |
-                       DRCR_CLEAR_EXCEPTIONS);
-       if (retval != ERROR_OK)
+       if (first != NULL)
+               retval = aarch64_do_restart_one(first, RESTART_LAZY);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("error restarting target %s", target_name(first));
                return retval;
+       }
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x10, 1);
-       if (retval != ERROR_OK)
-               return retval;
+       int64_t then = timeval_ms();
+       for (;;) {
+               struct target *curr = target;
+               bool all_resumed = true;
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x10000 + 0x1c, 2);
-       if (retval != ERROR_OK)
-               return retval;
+               foreach_smp_target(head, target->head) {
+                       uint32_t prsr;
+                       int resumed;
 
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCR_CORE_RESTARTED) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for resume");
-                       return ERROR_FAIL;
-               }
-       }
+                       curr = head->target;
 
-       target->debug_reason = DBG_REASON_NOTHALTED;
-       target->state = TARGET_RUNNING;
+                       if (curr == target)
+                               continue;
 
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
+                       if (!target_was_examined(curr))
+                               continue;
 
-       return ERROR_OK;
-}
+                       retval = aarch64_check_state_one(curr,
+                                       PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                       if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                               all_resumed = false;
+                               break;
+                       }
 
-static int aarch64_restore_smp(struct target *target, int handle_breakpoints)
-{
-       int retval = 0;
-       struct target_list *head;
-       struct target *curr;
-       uint64_t address;
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_RUNNING)) {
-                       /*  resume current address , not in step mode */
-                       retval += aarch64_internal_restore(curr, 1, &address,
-                                       handle_breakpoints, 0);
-                       retval += aarch64_internal_restart(curr);
+                       if (curr->state != TARGET_RUNNING) {
+                               curr->state = TARGET_RUNNING;
+                               curr->debug_reason = DBG_REASON_NOTHALTED;
+                               target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                       }
                }
-               head = head->next;
 
-       }
+               if (all_resumed)
+                       break;
+
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("%s: timeout waiting for target resume", __func__);
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit resume for the second cluster.
+                */
+               retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+               if (retval != ERROR_OK)
+                       break;
+}
+
        return retval;
 }
 
@@ -1025,33 +840,98 @@ static int aarch64_resume(struct target *target, int current,
        int retval = 0;
        uint64_t addr = address;
 
-       /* dummy resume for smp toggle in order to reduce gdb impact  */
-       if ((target->smp) && (target->gdb_service->core[1] != -1)) {
-               /*   simulate a start and halt of target */
-               target->gdb_service->target = NULL;
-               target->gdb_service->core[0] = target->gdb_service->core[1];
-               /*  fake resume at next poll we play the  target core[1], see poll*/
-               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               return 0;
-       }
-       aarch64_internal_restore(target, current, &addr, handle_breakpoints,
-                                debug_execution);
+       struct armv8_common *armv8 = target_to_armv8(target);
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_RESUME;
+
+       if (target->state != TARGET_HALTED)
+               return ERROR_TARGET_NOT_HALTED;
+
+       /*
+        * If this target is part of a SMP group, prepare the others
+        * targets for resuming. This involves restoring the complete
+        * target register context and setting up CTI gates to accept
+        * resume events from the trigger matrix.
+        */
        if (target->smp) {
-               target->gdb_service->core[0] = -1;
-               retval = aarch64_restore_smp(target, handle_breakpoints);
+               retval = aarch64_prep_restart_smp(target, handle_breakpoints, NULL);
                if (retval != ERROR_OK)
                        return retval;
        }
-       aarch64_internal_restart(target);
+
+       /* all targets prepared, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &addr, handle_breakpoints,
+                                debug_execution);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_SYNC);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target->smp) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       struct target *curr = target;
+                       struct target_list *head;
+                       bool all_resumed = true;
+
+                       foreach_smp_target(head, target->head) {
+                               uint32_t prsr;
+                               int resumed;
+
+                               curr = head->target;
+                               if (curr == target)
+                                       continue;
+                               if (!target_was_examined(curr))
+                                       continue;
+
+                               retval = aarch64_check_state_one(curr,
+                                               PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                               if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                                       all_resumed = false;
+                                       break;
+                               }
+
+                               if (curr->state != TARGET_RUNNING) {
+                                       curr->state = TARGET_RUNNING;
+                                       curr->debug_reason = DBG_REASON_NOTHALTED;
+                                       target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                               }
+                       }
+
+                       if (all_resumed)
+                               break;
+
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("%s: timeout waiting for target %s to resume", __func__, target_name(curr));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
+                       }
+
+                       /*
+                        * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                        * and it looks like the CTI's are not connected by a common
+                        * trigger matrix. It seems that we need to halt one core in each
+                        * cluster explicitly. So if we find that a core has not halted
+                        * yet, we trigger an explicit resume for the second cluster.
+                        */
+                       retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+                       if (retval != ERROR_OK)
+                               break;
+               }
+       }
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       target->debug_reason = DBG_REASON_NOTHALTED;
 
        if (!debug_execution) {
                target->state = TARGET_RUNNING;
                target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               LOG_DEBUG("target resumed at 0x%" PRIu64, addr);
+               LOG_DEBUG("target resumed at 0x%" PRIx64, addr);
        } else {
                target->state = TARGET_DEBUG_RUNNING;
                target_call_event_callbacks(target, TARGET_EVENT_DEBUG_RESUMED);
-               LOG_DEBUG("target debug resumed at 0x%" PRIu64, addr);
+               LOG_DEBUG("target debug resumed at 0x%" PRIx64, addr);
        }
 
        return ERROR_OK;
@@ -1059,58 +939,68 @@ static int aarch64_resume(struct target *target, int current,
 
 static int aarch64_debug_entry(struct target *target)
 {
-       uint32_t dscr;
        int retval = ERROR_OK;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
-       uint32_t tmp;
+       struct arm_dpm *dpm = &armv8->dpm;
+       enum arm_state core_state;
+       uint32_t dscr;
 
-       LOG_DEBUG("dscr = 0x%08" PRIx32, aarch64->cpudbg_dscr);
+       /* make sure to clear all sticky errors */
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       if (retval == ERROR_OK)
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval == ERROR_OK)
+               retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
 
-       /* REVISIT surely we should not re-read DSCR !! */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       /* REVISIT see A8 TRM 12.11.4 steps 2..3 -- make sure that any
-        * imprecise data aborts get discarded by issuing a Data
-        * Synchronization Barrier:  ARMV4_5_MCR(15, 0, 0, 7, 10, 4).
-        */
+       LOG_DEBUG("%s dscr = 0x%08" PRIx32, target_name(target), dscr);
 
-       /* Enable the ITR execution once we are in debug mode */
-       dscr |= DSCR_ITR_EN;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, dscr);
+       dpm->dscr = dscr;
+       core_state = armv8_dpm_get_core_state(dpm);
+       armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
+       armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
+
+       /* close the CTI gate for all events */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* discard async exceptions */
+       if (retval == ERROR_OK)
+               retval = dpm->instr_cpsr_sync(dpm);
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       arm_dpm_report_dscr(&armv8->dpm, aarch64->cpudbg_dscr);
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                                  armv8->debug_base + CPUDBG_DESR, &tmp);
-       if ((tmp & 0x7) == 0x4)
-               target->debug_reason = DBG_REASON_SINGLESTEP;
+       armv8_dpm_report_dscr(dpm, dscr);
 
        /* save address of instruction that triggered the watchpoint? */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
-               uint32_t wfar;
+               uint32_t tmp;
+               uint64_t wfar = 0;
 
                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_WFAR,
-                               &wfar);
+                               armv8->debug_base + CPUV8_DBG_WFAR1,
+                               &tmp);
                if (retval != ERROR_OK)
                        return retval;
-               arm_dpm_report_wfar(&armv8->dpm, wfar);
+               wfar = tmp;
+               wfar = (wfar << 32);
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_WFAR0,
+                               &tmp);
+               if (retval != ERROR_OK)
+                       return retval;
+               wfar |= tmp;
+               armv8_dpm_report_wfar(&armv8->dpm, wfar);
        }
 
-       retval = arm_dpm_read_current_registers_64(&armv8->dpm);
+       retval = armv8_dpm_read_current_registers(&armv8->dpm);
 
-       if (armv8->post_debug_entry) {
+       if (retval == ERROR_OK && armv8->post_debug_entry)
                retval = armv8->post_debug_entry(target);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
 
        return retval;
 }
@@ -1119,40 +1009,80 @@ static int aarch64_post_debug_entry(struct target *target)
 {
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
-       struct armv8_mmu_common *armv8_mmu = &armv8->armv8_mmu;
-       uint32_t sctlr_el1 = 0;
        int retval;
+       enum arm_mode target_mode = ARM_MODE_ANY;
+       uint32_t instr;
+
+       switch (armv8->arm.core_mode) {
+       case ARMV8_64_EL0T:
+               target_mode = ARMV8_64_EL1H;
+               /* fall through */
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL1, 0);
+               break;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL2, 0);
+               break;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL3, 0);
+               break;
+
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_IRQ:
+       case ARM_MODE_SYS:
+               instr = ARMV4_5_MRC(15, 0, 0, 1, 0, 0);
+               break;
+
+       default:
+               LOG_INFO("cannot read system control register in this mode");
+               return ERROR_FAIL;
+       }
+
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, target_mode);
 
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DRCR, 1<<2);
-       retval = aarch64_instr_read_data_r0(armv8->arm.dpm,
-                                           0xd5381000, &sctlr_el1);
+       retval = armv8->dpm.instr_read_data_r0(&armv8->dpm, instr, &aarch64->system_control_reg);
        if (retval != ERROR_OK)
                return retval;
 
-       LOG_DEBUG("sctlr_el1 = %#8.8x", sctlr_el1);
-       aarch64->system_control_reg = sctlr_el1;
-       aarch64->system_control_reg_curr = sctlr_el1;
-       aarch64->curr_mode = armv8->arm.core_mode;
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
 
-       armv8_mmu->mmu_enabled = sctlr_el1 & 0x1U ? 1 : 0;
-       armv8_mmu->armv8_cache.d_u_cache_enabled = sctlr_el1 & 0x4U ? 1 : 0;
-       armv8_mmu->armv8_cache.i_cache_enabled = sctlr_el1 & 0x1000U ? 1 : 0;
+       LOG_DEBUG("System_register: %8.8" PRIx32, aarch64->system_control_reg);
+       aarch64->system_control_reg_curr = aarch64->system_control_reg;
 
-#if 0
-       if (armv8->armv8_mmu.armv8_cache.ctype == -1)
-               armv8_identify_cache(target);
-#endif
+       if (armv8->armv8_mmu.armv8_cache.info == -1) {
+               armv8_identify_cache(armv8);
+               armv8_read_mpidr(armv8);
+       }
 
+       armv8->armv8_mmu.mmu_enabled =
+                       (aarch64->system_control_reg & 0x1U) ? 1 : 0;
+       armv8->armv8_mmu.armv8_cache.d_u_cache_enabled =
+               (aarch64->system_control_reg & 0x4U) ? 1 : 0;
+       armv8->armv8_mmu.armv8_cache.i_cache_enabled =
+               (aarch64->system_control_reg & 0x1000U) ? 1 : 0;
        return ERROR_OK;
 }
 
+/*
+ * single-step a target
+ */
 static int aarch64_step(struct target *target, int current, target_addr_t address,
        int handle_breakpoints)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       int saved_retval = ERROR_OK;
        int retval;
-       uint32_t tmp;
+       uint32_t edecr;
+
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_STEP;
 
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
@@ -1160,58 +1090,116 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
        }
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DECR, &tmp);
-       if (retval != ERROR_OK)
-               return retval;
+                       armv8->debug_base + CPUV8_DBG_EDECR, &edecr);
+       /* make sure EDECR.SS is not set when restoring the register */
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DECR, (tmp|0x4));
+       if (retval == ERROR_OK) {
+               edecr &= ~0x4;
+               /* set EDECR.SS to enter hardware step mode */
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
+       }
+       /* disable interrupts while stepping */
+       if (retval == ERROR_OK && aarch64->isrmasking_mode == AARCH64_ISRMASK_ON)
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
+       /* bail out if stepping setup has failed */
        if (retval != ERROR_OK)
                return retval;
 
-       target->debug_reason = DBG_REASON_SINGLESTEP;
-       retval = aarch64_resume(target, 1, address, 0, 0);
+       if (target->smp && (current == 1)) {
+               /*
+                * isolate current target so that it doesn't get resumed
+                * together with the others
+                */
+               retval = arm_cti_gate_channel(armv8->cti, 1);
+               /* resume all other targets in the group */
+               if (retval == ERROR_OK)
+                       retval = aarch64_step_restart_smp(target);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Failed to restart non-stepping targets in SMP group");
+                       return retval;
+               }
+               LOG_DEBUG("Restarted all non-stepping targets in SMP group");
+       }
+
+       /* all other targets running, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &address, 0, 0);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_LAZY);
+
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       while (target->state != TARGET_HALTED) {
-               mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DESR, &tmp);
-               LOG_DEBUG("DESR = %#x", tmp);
-               retval = aarch64_poll(target);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("timeout waiting for target halt");
-                       return ERROR_FAIL;
+       LOG_DEBUG("target step-resumed at 0x%" PRIx64, address);
+       if (!handle_breakpoints)
+               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               int stepped;
+               uint32_t prsr;
+
+               retval = aarch64_check_state_one(target,
+                                       PRSR_SDR|PRSR_HALT, PRSR_SDR|PRSR_HALT, &stepped, &prsr);
+               if (retval != ERROR_OK || stepped)
+                       break;
+
+               if (timeval_ms() > then + 100) {
+                       LOG_ERROR("timeout waiting for target %s halt after step",
+                                       target_name(target));
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
                }
        }
 
+       /*
+        * At least on one SoC (Renesas R8A7795) stepping over a WFI instruction
+        * causes a timeout. The core takes the step but doesn't complete it and so
+        * debug state is never entered. However, you can manually halt the core
+        * as an external debug even is also a WFI wakeup event.
+        */
+       if (retval == ERROR_TARGET_TIMEOUT)
+               saved_retval = aarch64_halt_one(target, HALT_SYNC);
+
+       /* restore EDECR */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DECR, (tmp&(~0x4)));
+                       armv8->debug_base + CPUV8_DBG_EDECR, edecr);
        if (retval != ERROR_OK)
                return retval;
 
-       target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-       if (target->state == TARGET_HALTED)
-               LOG_DEBUG("target stepped");
+       /* restore interrupts */
+       if (aarch64->isrmasking_mode == AARCH64_ISRMASK_ON) {
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
+               if (retval != ERROR_OK)
+                       return ERROR_OK;
+       }
+
+       if (saved_retval != ERROR_OK)
+               return saved_retval;
 
-       return ERROR_OK;
+       return aarch64_poll(target);
 }
 
 static int aarch64_restore_context(struct target *target, bool bpwp)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
 
-       LOG_DEBUG(" ");
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
 
        if (armv8->pre_restore_context)
                armv8->pre_restore_context(target);
 
-       return arm_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       retval = armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       if (retval == ERROR_OK) {
+               /* registers are now invalid */
+               register_cache_invalidate(arm->core_cache);
+               register_cache_invalidate(arm->core_cache->next);
+       }
 
-       return ERROR_OK;
+       return retval;
 }
 
 /*
@@ -1229,7 +1217,6 @@ static int aarch64_set_breakpoint(struct target *target,
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct aarch64_brp *brp_list = aarch64->brp_list;
-       uint32_t dscr;
 
        if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
@@ -1257,18 +1244,18 @@ static int aarch64_set_breakpoint(struct target *target,
                bpt_value = brp_list[brp_i].value;
 
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUDBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
                                (uint32_t)(bpt_value & 0xFFFFFFFF));
                if (retval != ERROR_OK)
                        return retval;
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUDBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
                                (uint32_t)(bpt_value >> 32));
                if (retval != ERROR_OK)
                        return retval;
 
                retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                               + CPUDBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
+                               + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
                                brp_list[brp_i].control);
                if (retval != ERROR_OK)
                        return retval;
@@ -1278,28 +1265,38 @@ static int aarch64_set_breakpoint(struct target *target,
 
        } else if (breakpoint->type == BKPT_SOFT) {
                uint8_t code[4];
-               buf_set_u32(code, 0, 32, 0xD4400000);
 
+               buf_set_u32(code, 0, 32, armv8_opcode(armv8, ARMV8_OPC_HLT));
                retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1,
                                breakpoint->orig_instr);
                if (retval != ERROR_OK)
                        return retval;
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                retval = target_write_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1, code);
                if (retval != ERROR_OK)
                        return retval;
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
+               armv8_cache_i_inner_inval_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                breakpoint->set = 0x11; /* Any nice value but 0 */
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUDBG_DSCR, &dscr);
        /* Ensure that halting debug mode is enable */
-       dscr = dscr | DSCR_HALT_DBG_MODE;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                        armv8->debug_base + CPUDBG_DSCR, dscr);
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Failed to set DSCR.HDE");
                return retval;
@@ -1335,18 +1332,19 @@ static int aarch64_set_context_breakpoint(struct target *target,
 
        breakpoint->set = brp_i + 1;
        control = ((matchmode & 0x7) << 20)
+               | (1 << 13)
                | (byte_addr_select << 5)
                | (3 << 1) | 1;
        brp_list[brp_i].used = 1;
        brp_list[brp_i].value = (breakpoint->asid);
        brp_list[brp_i].control = control;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
                        brp_list[brp_i].value);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
                        brp_list[brp_i].control);
        if (retval != ERROR_OK)
                return retval;
@@ -1407,30 +1405,36 @@ static int aarch64_set_hybrid_breakpoint(struct target *target, struct breakpoin
        brp_list[brp_1].value = (breakpoint->asid);
        brp_list[brp_1].control = control_CTX;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_1].BRPn,
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_1].BRPn,
                        brp_list[brp_1].value);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_1].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_1].BRPn,
                        brp_list[brp_1].control);
        if (retval != ERROR_OK)
                return retval;
 
        control_IVA = ((IVA_machmode & 0x7) << 20)
                | (brp_1 << 16)
+               | (1 << 13)
                | (IVA_byte_addr_select << 5)
                | (3 << 1) | 1;
        brp_list[brp_2].used = 1;
-       brp_list[brp_2].value = (breakpoint->address & 0xFFFFFFFC);
+       brp_list[brp_2].value = breakpoint->address & 0xFFFFFFFFFFFFFFFC;
        brp_list[brp_2].control = control_IVA;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_2].BRPn,
-                       brp_list[brp_2].value);
+                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_2].BRPn,
+                       brp_list[brp_2].value & 0xFFFFFFFF);
+       if (retval != ERROR_OK)
+               return retval;
+       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_2].BRPn,
+                       brp_list[brp_2].value >> 32);
        if (retval != ERROR_OK)
                return retval;
        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_2].BRPn,
+                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_2].BRPn,
                        brp_list[brp_2].control);
        if (retval != ERROR_OK)
                return retval;
@@ -1464,10 +1468,20 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUDBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
                        if ((brp_j < 0) || (brp_j >= aarch64->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
@@ -1478,10 +1492,21 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_j].value = 0;
                        brp_list[brp_j].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUDBG_BCR_BASE + 16 * brp_list[brp_j].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_j].BRPn,
                                        brp_list[brp_j].control);
                        if (retval != ERROR_OK)
                                return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_j].BRPn,
+                                       (uint32_t)brp_list[brp_j].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_j].BRPn,
+                                       (uint32_t)brp_list[brp_j].value);
+                       if (retval != ERROR_OK)
+                               return retval;
+
                        breakpoint->linked_BRP = 0;
                        breakpoint->set = 0;
                        return ERROR_OK;
@@ -1498,20 +1523,31 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BCR_BASE + 16 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
                        retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
-                                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
+                                       + CPUV8_DBG_BVR_BASE + 16 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
+
+                       retval = aarch64_dap_write_memap_register_u32(target, armv8->debug_base
+                                       + CPUV8_DBG_BVR_BASE + 4 + 16 * brp_list[brp_i].BRPn,
+                                       (uint32_t)brp_list[brp_i].value);
+                       if (retval != ERROR_OK)
+                               return retval;
                        breakpoint->set = 0;
                        return ERROR_OK;
                }
        } else {
                /* restore original instruction (kept in target endianness) */
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
                if (breakpoint->length == 4) {
                        retval = target_write_memory(target,
                                        breakpoint->address & 0xFFFFFFFFFFFFFFFE,
@@ -1525,6 +1561,14 @@ static int aarch64_unset_breakpoint(struct target *target, struct breakpoint *br
                        if (retval != ERROR_OK)
                                return retval;
                }
+
+               armv8_cache_d_inner_flush_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
+
+               armv8_cache_i_inner_inval_virt(armv8,
+                               breakpoint->address & 0xFFFFFFFFFFFFFFFE,
+                               breakpoint->length);
        }
        breakpoint->set = 0;
 
@@ -1627,7 +1671,10 @@ static int aarch64_assert_reset(struct target *target)
        }
 
        /* registers are now invalid */
-       register_cache_invalidate(armv8->arm.core_cache);
+       if (target_was_examined(target)) {
+               register_cache_invalidate(armv8->arm.core_cache);
+               register_cache_invalidate(armv8->arm.core_cache->next);
+       }
 
        target->state = TARGET_RESET;
 
@@ -1643,289 +1690,438 @@ static int aarch64_deassert_reset(struct target *target)
        /* be certain SRST is off */
        jtag_add_reset(0, 0);
 
+       if (!target_was_examined(target))
+               return ERROR_OK;
+
        retval = aarch64_poll(target);
        if (retval != ERROR_OK)
                return retval;
 
+       retval = aarch64_init_debug_access(target);
+       if (retval != ERROR_OK)
+               return retval;
+
        if (target->reset_halt) {
                if (target->state != TARGET_HALTED) {
                        LOG_WARNING("%s: ran after reset and before halt ...",
                                target_name(target));
                        retval = target_halt(target);
-                       if (retval != ERROR_OK)
-                               return retval;
                }
        }
 
+       return retval;
+}
+
+static int aarch64_write_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode if necessary */
+       if (*dscr & DSCR_MA) {
+               *dscr &= ~DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       while (count) {
+               uint32_t data, opcode;
+
+               /* write the data to store into DTRRX */
+               if (size == 1)
+                       data = *buffer;
+               else if (size == 2)
+                       data = target_buffer_get_u16(target, buffer);
+               else
+                       data = target_buffer_get_u32(target, buffer);
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MRS(SYSTEM_DBG_DTRRX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MRC(14, 0, 1, 0, 5, 0));
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Advance */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_write_cpu_memory_fast(struct target *target,
+       uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* Step 1.d   - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+
+       /* Step 2.a   - Do the write */
+       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
+                                       buffer, 4, count, armv8->debug_base + CPUV8_DBG_DTRRX);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 3.a   - Switch DTR mode back to Normal mode */
+       *dscr &= ~DSCR_MA;
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
        return ERROR_OK;
 }
 
-static int aarch64_write_apb_ab_memory(struct target *target,
+static int aarch64_write_cpu_memory(struct target *target,
        uint64_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
        /* write memory through APB-AP */
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
        uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
-       uint32_t i = 0;
 
-       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count%"  PRIu32,
-                         address, size, count);
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
-
-       /* Mark register R0 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
+       armv8_reg_current(arm, 0)->dirty = true;
 
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
+       /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
-       /*  clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap, armv8->debug_base + CPUDBG_DRCR, 1<<2);
+       /* Read DSCR */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       /* This algorithm comes from either :
-        * Cortex-A8 TRM Example 12-25
-        * Cortex-R4 TRM Example 11-26
-        * (slight differences)
-        */
-
-       /* The algorithm only copies 32 bit words, so the buffer
-        * should be expanded to include the words at either end.
-        * The first and last words will be read first to avoid
-        * corruption if needed.
-        */
-       tmp_buff = malloc(total_u32 * 4);
+       /* Set Normal access mode  */
+       dscr = (dscr & ~DSCR_MA);
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-       if ((start_byte != 0) && (total_u32 > 1)) {
-               /* First bytes not aligned - read the 32 bit word to avoid corrupting
-                * the other bytes in the word.
-                */
-               retval = aarch64_read_apb_ab_memory(target, (address & ~0x3), 4, 1, tmp_buff);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_w;
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Write X0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
+               retval = dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
+       } else {
+               /* Write R0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTRRX */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
+               retval = dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
        }
 
-       /* If end of write is not aligned, or the write is less than 4 bytes */
-       if ((end_byte != 0) ||
-               ((total_u32 == 1) && (total_bytes != 4))) {
-
-               /* Read the last word to avoid corruption during 32 bit write */
-               int mem_offset = (total_u32-1) * 4;
-               retval = aarch64_read_apb_ab_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_w;
-       }
+       if (retval != ERROR_OK)
+               return retval;
 
-       /* Copy the write buffer over the top of the temporary buffer */
-       memcpy(&tmp_buff[start_byte], buffer, total_bytes);
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_write_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_write_cpu_memory_slow(target, size, count, buffer, &dscr);
 
-       /* We now have a 32 bit aligned buffer that can be written */
+       if (retval != ERROR_OK) {
+               /* Unset DTR mode */
+               mem_ap_read_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
 
-       /* Read DSCR */
+       /* Check for sticky abort flags in the DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               goto error_free_buff_w;
-
-       /* Set DTR mode to Normal*/
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING;
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, dscr);
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
 
-       if (size > 4) {
-               LOG_WARNING("reading size >4 bytes not yet supported");
-               goto error_unset_dtr_w;
+       dpm->dscr = dscr;
+       if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
+               /* Abort occurred - clear it and exit */
+               LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
+               armv8_dpm_handle_exception(dpm, true);
+               return ERROR_FAIL;
        }
 
-       retval = aarch64_instr_write_data_dcc_64(arm->dpm, 0xd5330401, address+4);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+       /* Done */
+       return ERROR_OK;
+}
+
+static int aarch64_read_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode (if necessary) */
+       if (*dscr & DSCR_MA) {
+               *dscr &= DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
 
-       dscr = DSCR_INSTR_COMP;
-       while (i < count * size) {
-               uint32_t val;
+       while (count) {
+               uint32_t opcode, data;
 
-               memcpy(&val, &buffer[i], size);
-               retval = aarch64_instr_write_data_dcc(arm->dpm, 0xd5330500, val);
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
                if (retval != ERROR_OK)
-                       goto error_unset_dtr_w;
+                       return retval;
 
-               retval = aarch64_exec_opcode(target, 0xb81fc020, &dscr);
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DTRTX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 1, 0, 5, 0));
                if (retval != ERROR_OK)
-                       goto error_unset_dtr_w;
+                       return retval;
 
-               retval = aarch64_exec_opcode(target, 0x91001021, &dscr);
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRTX, &data);
                if (retval != ERROR_OK)
-                       goto error_unset_dtr_w;
+                       return retval;
+
+               if (size == 1)
+                       *buffer = (uint8_t)data;
+               else if (size == 2)
+                       target_buffer_set_u16(target, buffer, (uint16_t)data);
+               else
+                       target_buffer_set_u32(target, buffer, data);
 
-               i += 4;
+               /* Advance */
+               buffer += size;
+               --count;
        }
 
-       /* Check for sticky abort flags in the DSCR */
+       return ERROR_OK;
+}
+
+static int aarch64_read_cpu_memory_fast(struct target *target,
+       uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+       uint32_t value;
+
+       /* Mark X1 as dirty */
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
+       } else {
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+       }
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 1.e - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 1.f - read DBGDTRTX and discard the value */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
-       if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) {
-               /* Abort occurred - clear it and exit */
-               LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUDBG_DRCR, 1<<2);
-               goto error_free_buff_w;
+               return retval;
+
+       count--;
+       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
+        * Abort flags are sticky, so can be read at end of transactions
+        *
+        * This data is read in aligned to 32 bit boundary.
+        */
+
+       if (count) {
+               /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
+                * increments X0 by 4. */
+               retval = mem_ap_read_buf_noincr(armv8->debug_ap, buffer, 4, count,
+                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
+               if (retval != ERROR_OK)
+                       return retval;
        }
 
-       /* Done */
-       free(tmp_buff);
-       return ERROR_OK;
+       /* Step 3.a - set DTR access mode back to Normal mode   */
+       *dscr &= ~DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 3.b - read DBGDTRTX for the final value */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+       if (retval != ERROR_OK)
+               return retval;
 
-error_unset_dtr_w:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING;
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, dscr);
-error_free_buff_w:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
+       target_buffer_set_u32(target, buffer + count * 4, value);
+       return retval;
 }
 
-static int aarch64_read_apb_ab_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
        /* read memory through APB-AP */
-
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       struct reg *reg;
-       uint32_t dscr, val;
-       uint8_t *tmp_buff = NULL;
-       uint32_t i = 0;
+       uint32_t dscr;
+
+       LOG_DEBUG("Reading CPU memory address 0x%016" PRIx64 " size %" PRIu32 " count %" PRIu32,
+                       address, size, count);
 
-       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count%"  PRIu32,
-                         address, size, count);
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       /* Mark register R0 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
+       armv8_reg_current(arm, 0)->dirty = true;
 
-       /*  clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-               armv8->debug_base + CPUDBG_DRCR, 1<<2);
+       /* Read DSCR */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
+               return retval;
+
+       /* This algorithm comes from DDI0487A.g, chapter J9.1 */
+
+       /* Set Normal access mode  */
+       dscr &= ~DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Write X0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
+               retval = dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
+       } else {
+               /* Write R0 with value 'address' using write procedure */
+               /* Step 1.a+b - Write the address for read access into DBGDTRRXint */
+               /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
+               retval = dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
+       }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_r;
+               return retval;
+
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_read_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_read_cpu_memory_slow(target, size, count, buffer, &dscr);
 
-       if (size > 4) {
-               LOG_WARNING("reading size >4 bytes not yet supported");
-               goto error_unset_dtr_r;
+       if (dscr & DSCR_MA) {
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
        }
 
-       while (i < count * size) {
+       if (retval != ERROR_OK)
+               return retval;
 
-               retval = aarch64_instr_write_data_dcc_64(arm->dpm, 0xd5330400, address+4);
-               if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+       /* Check for sticky abort flags in the DSCR */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-               dscr = DSCR_INSTR_COMP;
-               retval = aarch64_exec_opcode(target, 0xb85fc000, &dscr);
-               if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_DSCR, &dscr);
+       dpm->dscr = dscr;
 
-               retval = aarch64_instr_read_data_dcc(arm->dpm, 0xd5130400, &val);
-               if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
-               memcpy(&buffer[i], &val, size);
-               i += 4;
-               address += 4;
+       if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
+               /* Abort occurred - clear it and exit */
+               LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
+               armv8_dpm_handle_exception(dpm, true);
+               return ERROR_FAIL;
        }
 
-       /* Clear any sticky error */
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-               armv8->debug_base + CPUDBG_DRCR, 1<<2);
-
        /* Done */
        return ERROR_OK;
-
-error_unset_dtr_r:
-       LOG_WARNING("DSCR = 0x%" PRIx32, dscr);
-       /* Todo: Unset DTR mode */
-
-error_free_buff_r:
-       LOG_ERROR("error");
-       free(tmp_buff);
-
-       /* Clear any sticky error */
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-               armv8->debug_base + CPUDBG_DRCR, 1<<2);
-
-       return ERROR_FAIL;
 }
 
 static int aarch64_read_phys_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-       LOG_DEBUG("Reading memory at real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32,
-               address, size, count);
 
        if (count && buffer) {
-
-               if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-
-                       /* read memory through AHB-AP */
-                       retval = mem_ap_read_buf(armv8->memory_ap, buffer, size, count, address);
-               } else {
-                       /* read memory through APB-AP */
-                       retval = aarch64_mmu_modify(target, 0);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       retval = aarch64_read_apb_ab_memory(target, address, size, count, buffer);
-               }
+               /* read memory through APB-AP */
+               retval = aarch64_mmu_modify(target, 0);
+               if (retval != ERROR_OK)
+                       return retval;
+               retval = aarch64_read_cpu_memory(target, address, size, count, buffer);
        }
        return retval;
 }
@@ -1934,136 +2130,34 @@ static int aarch64_read_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, uint8_t *buffer)
 {
        int mmu_enabled = 0;
-       target_addr_t virt, phys;
        int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Reading memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
 
        /* determine if MMU was enabled on target stop */
-       if (!armv8->is_armv7r) {
-               retval = aarch64_mmu(target, &mmu_enabled);
+       retval = aarch64_mmu(target, &mmu_enabled);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mmu_enabled) {
+               /* enable MMU as we could have disabled it for phys access */
+               retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               if (mmu_enabled) {
-                       virt = address;
-                       retval = aarch64_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
-
-                       LOG_DEBUG("Reading at virtual address. Translating v:0x%" TARGET_PRIxADDR " to r:0x%" TARGET_PRIxADDR,
-                                 virt, phys);
-                       address = phys;
-               }
-               retval = aarch64_read_phys_memory(target, address, size, count,
-                                                 buffer);
-       } else {
-               if (mmu_enabled) {
-                       retval = aarch64_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /* enable MMU as we could have disabled it for phys
-                          access */
-                       retval = aarch64_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = aarch64_read_apb_ab_memory(target, address, size,
-                                                   count, buffer);
-       }
-       return retval;
+       return aarch64_read_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_write_phys_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       uint8_t apsel = swjdp->apsel;
-
-       LOG_DEBUG("Writing memory to real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
 
        if (count && buffer) {
-
-               if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-
-                       /* write memory through AHB-AP */
-                       retval = mem_ap_write_buf(armv8->memory_ap, buffer, size, count, address);
-               } else {
-
-                       /* write memory through APB-AP */
-                       if (!armv8->is_armv7r) {
-                               retval = aarch64_mmu_modify(target, 0);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-                       return aarch64_write_apb_ab_memory(target, address, size, count, buffer);
-               }
-       }
-
-
-       /* REVISIT this op is generic ARMv7-A/R stuff */
-       if (retval == ERROR_OK && target->state == TARGET_HALTED) {
-               struct arm_dpm *dpm = armv8->arm.dpm;
-
-               retval = dpm->prepare(dpm);
+               /* write memory through APB-AP */
+               retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-
-               /* The Cache handling will NOT work with MMU active, the
-                * wrong addresses will be invalidated!
-                *
-                * For both ICache and DCache, walk all cache lines in the
-                * address range. Cortex-A8 has fixed 64 byte line length.
-                *
-                * REVISIT per ARMv7, these may trigger watchpoints ...
-                */
-
-               /* invalidate I-Cache */
-               if (armv8->armv8_mmu.armv8_cache.i_cache_enabled) {
-                       /* ICIMVAU - Invalidate Cache single entry
-                        * with MVA to PoU
-                        *      MCR p15, 0, r0, c7, c5, 1
-                        */
-                       for (uint32_t cacheline = address;
-                               cacheline < address + size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 5, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
-
-               /* invalidate D-Cache */
-               if (armv8->armv8_mmu.armv8_cache.d_u_cache_enabled) {
-                       /* DCIMVAC - Invalidate data Cache line
-                        * with MVA to PoC
-                        *      MCR p15, 0, r0, c7, c6, 1
-                        */
-                       for (uint32_t cacheline = address;
-                               cacheline < address + size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 6, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
-
-               /* (void) */ dpm->finish(dpm);
+               return aarch64_write_cpu_memory(target, address, size, count, buffer);
        }
 
        return retval;
@@ -2073,51 +2167,20 @@ static int aarch64_write_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, const uint8_t *buffer)
 {
        int mmu_enabled = 0;
-       target_addr_t virt, phys;
        int retval;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Writing memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32
-                 "; count %" PRId32, address, size, count);
 
        /* determine if MMU was enabled on target stop */
-       if (!armv8->is_armv7r) {
-               retval = aarch64_mmu(target, &mmu_enabled);
+       retval = aarch64_mmu(target, &mmu_enabled);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mmu_enabled) {
+               /* enable MMU as we could have disabled it for phys access */
+               retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               LOG_DEBUG("Writing memory to address 0x%" TARGET_PRIxADDR "; size %"
-                         PRId32 "; count %" PRId32, address, size, count);
-               if (mmu_enabled) {
-                       virt = address;
-                       retval = aarch64_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
-
-                       LOG_DEBUG("Writing to virtual address. Translating v:0x%"
-                                 TARGET_PRIxADDR " to r:0x%" TARGET_PRIxADDR, virt, phys);
-                       address = phys;
-               }
-               retval = aarch64_write_phys_memory(target, address, size,
-                               count, buffer);
-       } else {
-               if (mmu_enabled) {
-                       retval = aarch64_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /* enable MMU as we could have disabled it for phys access */
-                       retval = aarch64_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = aarch64_write_apb_ab_memory(target, address, size, count, buffer);
-       }
-       return retval;
+       return aarch64_write_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_handle_target_request(void *priv)
@@ -2135,16 +2198,16 @@ static int aarch64_handle_target_request(void *priv)
                uint32_t request;
                uint32_t dscr;
                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUDBG_DSCR, &dscr);
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
 
                /* check if we have data */
                while ((dscr & DSCR_DTR_TX_FULL) && (retval == ERROR_OK)) {
                        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUDBG_DTRTX, &request);
+                                       armv8->debug_base + CPUV8_DBG_DTRTX, &request);
                        if (retval == ERROR_OK) {
                                target_request(target, request);
                                retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                               armv8->debug_base + CPUDBG_DSCR, &dscr);
+                                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
                        }
                }
        }
@@ -2157,16 +2220,13 @@ static int aarch64_examine_first(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct adiv5_dap *swjdp = armv8->arm.dap;
-       int retval = ERROR_OK;
-       uint32_t pfr, debug, ctypr, ttypr, cpuid;
+       struct aarch64_private_config *pc;
        int i;
-
-       /* We do one extra read to ensure DAP is configured,
-        * we call ahbap_debugport_init(swjdp) instead
-        */
-       retval = dap_dp_init(swjdp);
-       if (retval != ERROR_OK)
-               return retval;
+       int retval = ERROR_OK;
+       uint64_t debug, ttypr;
+       uint32_t cpuid;
+       uint32_t tmp0, tmp1, tmp2, tmp3;
+       debug = ttypr = cpuid = 0;
 
        /* Search for the APB-AB - it is needed for access to debug registers */
        retval = dap_find_ap(swjdp, AP_TYPE_APB_AP, &armv8->debug_ap);
@@ -2181,21 +2241,7 @@ static int aarch64_examine_first(struct target *target)
                return retval;
        }
 
-       armv8->debug_ap->memaccess_tck = 80;
-
-       /* Search for the AHB-AB */
-       armv8->memory_ap_available = false;
-       retval = dap_find_ap(swjdp, AP_TYPE_AHB_AP, &armv8->memory_ap);
-       if (retval == ERROR_OK) {
-               retval = mem_ap_init(armv8->memory_ap);
-               if (retval == ERROR_OK)
-                       armv8->memory_ap_available = true;
-       }
-       if (retval != ERROR_OK) {
-               /* AHB-AP not found or unavailable - use the CPU */
-               LOG_DEBUG("No AHB-AP available for memory access");
-       }
-
+       armv8->debug_ap->memaccess_tck = 10;
 
        if (!target->dbgbase_set) {
                uint32_t dbgbase;
@@ -2210,85 +2256,73 @@ static int aarch64_examine_first(struct target *target)
                                &armv8->debug_base, &coreidx);
                if (retval != ERROR_OK)
                        return retval;
-               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32,
-                         coreidx, armv8->debug_base);
+               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32
+                               " apid: %08" PRIx32, coreidx, armv8->debug_base, apid);
        } else
                armv8->debug_base = target->dbgbase;
 
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x300, 0);
+                       armv8->debug_base + CPUV8_DBG_OSLAR, 0);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "oslock");
                return retval;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x88, &cpuid);
-       LOG_DEBUG("0x88 = %x", cpuid);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x314, &cpuid);
-       LOG_DEBUG("0x314 = %x", cpuid);
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + 0x310, &cpuid);
-       LOG_DEBUG("0x310 = %x", cpuid);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_CPUID, &cpuid);
+       retval = mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_MAINID0, &cpuid);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "CPUID");
                return retval;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_CTYPR, &ctypr);
-       if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "CTYPR");
-               return retval;
-       }
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUDBG_TTYPR, &ttypr);
+       retval = mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_MEMFEATURE0, &tmp0);
+       retval += mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_MEMFEATURE0 + 4, &tmp1);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "TTYPR");
+               LOG_DEBUG("Examine %s failed", "Memory Model Type");
                return retval;
        }
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + ID_AA64PFR0_EL1, &pfr);
+       retval = mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0, &tmp2);
+       retval += mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0 + 4, &tmp3);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "ID_AA64DFR0_EL1");
                return retval;
        }
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + ID_AA64DFR0_EL1, &debug);
+
+       retval = dap_run(armv8->debug_ap->dap);
        if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "ID_AA64DFR0_EL1");
+               LOG_ERROR("%s: examination failed\n", target_name(target));
                return retval;
        }
 
+       ttypr |= tmp1;
+       ttypr = (ttypr << 32) | tmp0;
+       debug |= tmp3;
+       debug = (debug << 32) | tmp2;
+
        LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
-       LOG_DEBUG("ctypr = 0x%08" PRIx32, ctypr);
-       LOG_DEBUG("ttypr = 0x%08" PRIx32, ttypr);
-       LOG_DEBUG("ID_AA64PFR0_EL1 = 0x%08" PRIx32, pfr);
-       LOG_DEBUG("ID_AA64DFR0_EL1 = 0x%08" PRIx32, debug);
+       LOG_DEBUG("ttypr = 0x%08" PRIx64, ttypr);
+       LOG_DEBUG("debug = 0x%08" PRIx64, debug);
+
+       if (target->private_config == NULL)
+               return ERROR_FAIL;
+
+       pc = (struct aarch64_private_config *)target->private_config;
+       if (pc->cti == NULL)
+               return ERROR_FAIL;
+
+       armv8->cti = pc->cti;
 
-       armv8->arm.core_type = ARM_MODE_MON;
-       armv8->arm.core_state = ARM_STATE_AARCH64;
        retval = aarch64_dpm_setup(aarch64, debug);
        if (retval != ERROR_OK)
                return retval;
 
        /* Setup Breakpoint Register Pairs */
-       aarch64->brp_num = ((debug >> 12) & 0x0F) + 1;
-       aarch64->brp_num_context = ((debug >> 28) & 0x0F) + 1;
-
-       /* hack - no context bpt support yet */
-       aarch64->brp_num_context = 0;
-
+       aarch64->brp_num = (uint32_t)((debug >> 12) & 0x0F) + 1;
+       aarch64->brp_num_context = (uint32_t)((debug >> 28) & 0x0F) + 1;
        aarch64->brp_num_available = aarch64->brp_num;
        aarch64->brp_list = calloc(aarch64->brp_num, sizeof(struct aarch64_brp));
        for (i = 0; i < aarch64->brp_num; i++) {
@@ -2304,6 +2338,9 @@ static int aarch64_examine_first(struct target *target)
 
        LOG_DEBUG("Configured %i hw breakpoints", aarch64->brp_num);
 
+       target->state = TARGET_UNKNOWN;
+       target->debug_reason = DBG_REASON_NOTHALTED;
+       aarch64->isrmasking_mode = AARCH64_ISRMASK_ON;
        target_set_examined(target);
        return ERROR_OK;
 }
@@ -2331,60 +2368,67 @@ static int aarch64_init_target(struct command_context *cmd_ctx,
        struct target *target)
 {
        /* examine_first() does a bunch of this */
+       arm_semihosting_init(target);
        return ERROR_OK;
 }
 
 static int aarch64_init_arch_info(struct target *target,
-       struct aarch64_common *aarch64, struct jtag_tap *tap)
+       struct aarch64_common *aarch64, struct adiv5_dap *dap)
 {
        struct armv8_common *armv8 = &aarch64->armv8_common;
-       struct adiv5_dap *dap = armv8->arm.dap;
-
-       armv8->arm.dap = dap;
 
        /* Setup struct aarch64_common */
        aarch64->common_magic = AARCH64_COMMON_MAGIC;
-       /*  tap has no dap initialized */
-       if (!tap->dap) {
-               tap->dap = dap_init();
-
-               /* Leave (only) generic DAP stuff for debugport_init() */
-               tap->dap->tap = tap;
-       }
-
-       armv8->arm.dap = tap->dap;
-
-       aarch64->fast_reg_read = 0;
+       armv8->arm.dap = dap;
 
        /* register arch-specific functions */
        armv8->examine_debug_reason = NULL;
-
        armv8->post_debug_entry = aarch64_post_debug_entry;
-
        armv8->pre_restore_context = NULL;
-
        armv8->armv8_mmu.read_physical_memory = aarch64_read_phys_memory;
 
-       /* REVISIT v7a setup should be in a v7a-specific routine */
        armv8_init_arch_info(target, armv8);
-       target_register_timer_callback(aarch64_handle_target_request, 1, 1, target);
+       target_register_timer_callback(aarch64_handle_target_request, 1,
+               TARGET_TIMER_TYPE_PERIODIC, target);
 
        return ERROR_OK;
 }
 
 static int aarch64_target_create(struct target *target, Jim_Interp *interp)
 {
-       struct aarch64_common *aarch64 = calloc(1, sizeof(struct aarch64_common));
+       struct aarch64_private_config *pc = target->private_config;
+       struct aarch64_common *aarch64;
+
+       if (adiv5_verify_config(&pc->adiv5_config) != ERROR_OK)
+               return ERROR_FAIL;
+
+       aarch64 = calloc(1, sizeof(struct aarch64_common));
+       if (aarch64 == NULL) {
+               LOG_ERROR("Out of memory");
+               return ERROR_FAIL;
+       }
+
+       return aarch64_init_arch_info(target, aarch64, pc->adiv5_config.dap);
+}
 
-       aarch64->armv8_common.is_armv7r = false;
+static void aarch64_deinit_target(struct target *target)
+{
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       struct armv8_common *armv8 = &aarch64->armv8_common;
+       struct arm_dpm *dpm = &armv8->dpm;
 
-       return aarch64_init_arch_info(target, aarch64, target->tap);
+       armv8_free_reg_cache(target);
+       free(aarch64->brp_list);
+       free(dpm->dbp);
+       free(dpm->dwp);
+       free(target->private_config);
+       free(aarch64);
 }
 
 static int aarch64_mmu(struct target *target, int *enabled)
 {
        if (target->state != TARGET_HALTED) {
-               LOG_ERROR("%s: target not halted", __func__);
+               LOG_ERROR("%s: target %s not halted", __func__, target_name(target));
                return ERROR_TARGET_INVALID;
        }
 
@@ -2395,26 +2439,95 @@ static int aarch64_mmu(struct target *target, int *enabled)
 static int aarch64_virt2phys(struct target *target, target_addr_t virt,
                             target_addr_t *phys)
 {
-       int retval = ERROR_FAIL;
-       struct armv8_common *armv8 = target_to_armv8(target);
-       struct adiv5_dap *swjdp = armv8->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-       if (armv8->memory_ap_available && (apsel == armv8->memory_ap->ap_num)) {
-               uint32_t ret;
-               retval = armv8_mmu_translate_va(target,
-                               virt, &ret);
-               if (retval != ERROR_OK)
-                       goto done;
-               *phys = ret;
-       } else {/*  use this method if armv8->memory_ap not selected
-                *  mmu must be enable in order to get a correct translation */
-               retval = aarch64_mmu_modify(target, 1);
-               if (retval != ERROR_OK)
-                       goto done;
-               retval = armv8_mmu_translate_va_pa(target, virt,  phys, 1);
+       return armv8_mmu_translate_va_pa(target, virt, phys, 1);
+}
+
+/*
+ * private target configuration items
+ */
+enum aarch64_cfg_param {
+       CFG_CTI,
+};
+
+static const Jim_Nvp nvp_config_opts[] = {
+       { .name = "-cti", .value = CFG_CTI },
+       { .name = NULL, .value = -1 }
+};
+
+static int aarch64_jim_configure(struct target *target, Jim_GetOptInfo *goi)
+{
+       struct aarch64_private_config *pc;
+       Jim_Nvp *n;
+       int e;
+
+       pc = (struct aarch64_private_config *)target->private_config;
+       if (pc == NULL) {
+                       pc = calloc(1, sizeof(struct aarch64_private_config));
+                       target->private_config = pc;
        }
-done:
-       return retval;
+
+       /*
+        * Call adiv5_jim_configure() to parse the common DAP options
+        * It will return JIM_CONTINUE if it didn't find any known
+        * options, JIM_OK if it correctly parsed the topmost option
+        * and JIM_ERR if an error occured during parameter evaluation.
+        * For JIM_CONTINUE, we check our own params.
+        */
+       e = adiv5_jim_configure(target, goi);
+       if (e != JIM_CONTINUE)
+               return e;
+
+       /* parse config or cget options ... */
+       if (goi->argc > 0) {
+               Jim_SetEmptyResult(goi->interp);
+
+               /* check first if topmost item is for us */
+               e = Jim_Nvp_name2value_obj(goi->interp, nvp_config_opts,
+                               goi->argv[0], &n);
+               if (e != JIM_OK)
+                       return JIM_CONTINUE;
+
+               e = Jim_GetOpt_Obj(goi, NULL);
+               if (e != JIM_OK)
+                       return e;
+
+               switch (n->value) {
+               case CFG_CTI: {
+                       if (goi->isconfigure) {
+                               Jim_Obj *o_cti;
+                               struct arm_cti *cti;
+                               e = Jim_GetOpt_Obj(goi, &o_cti);
+                               if (e != JIM_OK)
+                                       return e;
+                               cti = cti_instance_by_jim_obj(goi->interp, o_cti);
+                               if (cti == NULL) {
+                                       Jim_SetResultString(goi->interp, "CTI name invalid!", -1);
+                                       return JIM_ERR;
+                               }
+                               pc->cti = cti;
+                       } else {
+                               if (goi->argc != 0) {
+                                       Jim_WrongNumArgs(goi->interp,
+                                                       goi->argc, goi->argv,
+                                                       "NO PARAMS");
+                                       return JIM_ERR;
+                               }
+
+                               if (pc == NULL || pc->cti == NULL) {
+                                       Jim_SetResultString(goi->interp, "CTI not configured", -1);
+                                       return JIM_ERR;
+                               }
+                               Jim_SetResultString(goi->interp, arm_cti_name(pc->cti), -1);
+                       }
+                       break;
+               }
+
+               default:
+                       return JIM_CONTINUE;
+               }
+       }
+
+       return JIM_OK;
 }
 
 COMMAND_HANDLER(aarch64_handle_cache_info_command)
@@ -2474,27 +2587,171 @@ COMMAND_HANDLER(aarch64_handle_smp_on_command)
        return ERROR_OK;
 }
 
-COMMAND_HANDLER(aarch64_handle_smp_gdb_command)
+COMMAND_HANDLER(aarch64_mask_interrupts_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       int retval = ERROR_OK;
-       struct target_list *head;
-       head = target->head;
-       if (head != (struct target_list *)NULL) {
-               if (CMD_ARGC == 1) {
-                       int coreid = 0;
-                       COMMAND_PARSE_NUMBER(int, CMD_ARGV[0], coreid);
-                       if (ERROR_OK != retval)
-                               return retval;
-                       target->gdb_service->core[1] = coreid;
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
 
+       static const Jim_Nvp nvp_maskisr_modes[] = {
+               { .name = "off", .value = AARCH64_ISRMASK_OFF },
+               { .name = "on", .value = AARCH64_ISRMASK_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const Jim_Nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = Jim_Nvp_name2value_simple(nvp_maskisr_modes, CMD_ARGV[0]);
+               if (n->name == NULL) {
+                       LOG_ERROR("Unknown parameter: %s - should be off or on", CMD_ARGV[0]);
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
-               command_print(CMD_CTX, "gdb coreid  %" PRId32 " -> %" PRId32, target->gdb_service->core[0]
-                       , target->gdb_service->core[1]);
+
+               aarch64->isrmasking_mode = n->value;
        }
+
+       n = Jim_Nvp_value2name_simple(nvp_maskisr_modes, aarch64->isrmasking_mode);
+       command_print(CMD_CTX, "aarch64 interrupt mask %s", n->name);
+
        return ERROR_OK;
 }
 
+static int jim_mcrmrc(Jim_Interp *interp, int argc, Jim_Obj * const *argv)
+{
+       struct command_context *context;
+       struct target *target;
+       struct arm *arm;
+       int retval;
+       bool is_mcr = false;
+       int arg_cnt = 0;
+
+       if (Jim_CompareStringImmediate(interp, argv[0], "mcr")) {
+               is_mcr = true;
+               arg_cnt = 7;
+       } else {
+               arg_cnt = 6;
+       }
+
+       context = current_command_context(interp);
+       assert(context != NULL);
+
+       target = get_current_target(context);
+       if (target == NULL) {
+               LOG_ERROR("%s: no current target", __func__);
+               return JIM_ERR;
+       }
+       if (!target_was_examined(target)) {
+               LOG_ERROR("%s: not yet examined", target_name(target));
+               return JIM_ERR;
+       }
+
+       arm = target_to_arm(target);
+       if (!is_arm(arm)) {
+               LOG_ERROR("%s: not an ARM", target_name(target));
+               return JIM_ERR;
+       }
+
+       if (target->state != TARGET_HALTED)
+               return ERROR_TARGET_NOT_HALTED;
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               LOG_ERROR("%s: not 32-bit arm target", target_name(target));
+               return JIM_ERR;
+       }
+
+       if (argc != arg_cnt) {
+               LOG_ERROR("%s: wrong number of arguments", __func__);
+               return JIM_ERR;
+       }
+
+       int cpnum;
+       uint32_t op1;
+       uint32_t op2;
+       uint32_t CRn;
+       uint32_t CRm;
+       uint32_t value;
+       long l;
+
+       /* NOTE:  parameter sequence matches ARM instruction set usage:
+        *      MCR     pNUM, op1, rX, CRn, CRm, op2    ; write CP from rX
+        *      MRC     pNUM, op1, rX, CRn, CRm, op2    ; read CP into rX
+        * The "rX" is necessarily omitted; it uses Tcl mechanisms.
+        */
+       retval = Jim_GetLong(interp, argv[1], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "coprocessor", (int) l);
+               return JIM_ERR;
+       }
+       cpnum = l;
+
+       retval = Jim_GetLong(interp, argv[2], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0x7) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "op1", (int) l);
+               return JIM_ERR;
+       }
+       op1 = l;
+
+       retval = Jim_GetLong(interp, argv[3], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "CRn", (int) l);
+               return JIM_ERR;
+       }
+       CRn = l;
+
+       retval = Jim_GetLong(interp, argv[4], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "CRm", (int) l);
+               return JIM_ERR;
+       }
+       CRm = l;
+
+       retval = Jim_GetLong(interp, argv[5], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0x7) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "op2", (int) l);
+               return JIM_ERR;
+       }
+       op2 = l;
+
+       value = 0;
+
+       if (is_mcr == true) {
+               retval = Jim_GetLong(interp, argv[6], &l);
+               if (retval != JIM_OK)
+                       return retval;
+               value = l;
+
+               /* NOTE: parameters reordered! */
+               /* ARMV4_5_MCR(cpnum, op1, 0, CRn, CRm, op2) */
+               retval = arm->mcr(target, cpnum, op1, op2, CRn, CRm, value);
+               if (retval != ERROR_OK)
+                       return JIM_ERR;
+       } else {
+               /* NOTE: parameters reordered! */
+               /* ARMV4_5_MRC(cpnum, op1, 0, CRn, CRm, op2) */
+               retval = arm->mrc(target, cpnum, op1, op2, CRn, CRm, &value);
+               if (retval != ERROR_OK)
+                       return JIM_ERR;
+
+               Jim_SetResult(interp, Jim_NewIntObj(interp, value));
+       }
+
+       return JIM_OK;
+}
+
 static const struct command_registration aarch64_exec_command_handlers[] = {
        {
                .name = "cache_info",
@@ -2524,27 +2781,39 @@ static const struct command_registration aarch64_exec_command_handlers[] = {
                .usage = "",
        },
        {
-               .name = "smp_gdb",
-               .handler = aarch64_handle_smp_gdb_command,
+               .name = "maskisr",
+               .handler = aarch64_mask_interrupts_command,
+               .mode = COMMAND_ANY,
+               .help = "mask aarch64 interrupts during single-step",
+               .usage = "['on'|'off']",
+       },
+       {
+               .name = "mcr",
                .mode = COMMAND_EXEC,
-               .help = "display/fix current core played to gdb",
-               .usage = "",
+               .jim_handler = jim_mcrmrc,
+               .help = "write coprocessor register",
+               .usage = "cpnum op1 CRn CRm op2 value",
+       },
+       {
+               .name = "mrc",
+               .mode = COMMAND_EXEC,
+               .jim_handler = jim_mcrmrc,
+               .help = "read coprocessor register",
+               .usage = "cpnum op1 CRn CRm op2",
        },
 
 
        COMMAND_REGISTRATION_DONE
 };
+
 static const struct command_registration aarch64_command_handlers[] = {
-       {
-               .chain = arm_command_handlers,
-       },
        {
                .chain = armv8_command_handlers,
        },
        {
-               .name = "cortex_a",
+               .name = "aarch64",
                .mode = COMMAND_ANY,
-               .help = "Cortex-A command group",
+               .help = "Aarch64 command group",
                .usage = "",
                .chain = aarch64_exec_command_handlers,
        },
@@ -2565,16 +2834,12 @@ struct target_type aarch64_target = {
        .deassert_reset = aarch64_deassert_reset,
 
        /* REVISIT allow exporting VFP3 registers ... */
+       .get_gdb_arch = armv8_get_gdb_arch,
        .get_gdb_reg_list = armv8_get_gdb_reg_list,
 
        .read_memory = aarch64_read_memory,
        .write_memory = aarch64_write_memory,
 
-       .checksum_memory = arm_checksum_memory,
-       .blank_check_memory = arm_blank_check_memory,
-
-       .run_algorithm = armv4_5_run_algorithm,
-
        .add_breakpoint = aarch64_add_breakpoint,
        .add_context_breakpoint = aarch64_add_context_breakpoint,
        .add_hybrid_breakpoint = aarch64_add_hybrid_breakpoint,
@@ -2584,7 +2849,9 @@ struct target_type aarch64_target = {
 
        .commands = aarch64_command_handlers,
        .target_create = aarch64_target_create,
+       .target_jim_configure = aarch64_jim_configure,
        .init_target = aarch64_init_target,
+       .deinit_target = aarch64_deinit_target,
        .examine = aarch64_examine,
 
        .read_phys_memory = aarch64_read_phys_memory,

Linking to existing account procedure

If you already have an account and want to add another login method you MUST first sign in with your existing account and then change URL to read https://review.openocd.org/login/?link to get to this page again but this time it'll work for linking. Thank you.

SSH host keys fingerprints

1024 SHA256:YKx8b7u5ZWdcbp7/4AeXNaqElP49m6QrwfXaqQGJAOk gerrit-code-review@openocd.zylin.com (DSA)
384 SHA256:jHIbSQa4REvwCFG4cq5LBlBLxmxSqelQPem/EXIrxjk gerrit-code-review@openocd.org (ECDSA)
521 SHA256:UAOPYkU9Fjtcao0Ul/Rrlnj/OsQvt+pgdYSZ4jOYdgs gerrit-code-review@openocd.org (ECDSA)
256 SHA256:A13M5QlnozFOvTllybRZH6vm7iSt0XLxbA48yfc2yfY gerrit-code-review@openocd.org (ECDSA)
256 SHA256:spYMBqEYoAOtK7yZBrcwE8ZpYt6b68Cfh9yEVetvbXg gerrit-code-review@openocd.org (ED25519)
+--[ED25519 256]--+
|=..              |
|+o..   .         |
|*.o   . .        |
|+B . . .         |
|Bo. = o S        |
|Oo.+ + =         |
|oB=.* = . o      |
| =+=.+   + E     |
|. .=o   . o      |
+----[SHA256]-----+
2048 SHA256:0Onrb7/PHjpo6iVZ7xQX2riKN83FJ3KGU0TvI0TaFG4 gerrit-code-review@openocd.zylin.com (RSA)