diff --git a/agent/machine/microblaze/tcf/cpudefs-mdep.c b/agent/machine/microblaze/tcf/cpudefs-mdep.c
index 90781f9..3273fa0 100644
--- a/agent/machine/microblaze/tcf/cpudefs-mdep.c
+++ b/agent/machine/microblaze/tcf/cpudefs-mdep.c
@@ -23,6 +23,7 @@
 #include <tcf/framework/cpudefs.h>
 #include <tcf/framework/context.h>
 #include <tcf/framework/myalloc.h>
+#include <tcf/services/runctrl.h>
 #include <machine/microblaze/tcf/stack-crawl-microblaze.h>
 #include <machine/microblaze/tcf/disassembler-microblaze.h>
 #if ENABLE_ContextMux
@@ -30,21 +31,30 @@
 #endif
 #include <tcf/cpudefs-mdep.h>
 
+typedef struct ContextExtensionMicroBlaze {
+    int sw_stepping;
+    char opcode[sizeof(BREAK_INST)];
+    ContextAddress addr;
+} ContextExtensionMicroBlaze;
+
+static size_t context_extension_offset = 0;
+
+#define EXT(ctx) ((ContextExtensionMicroBlaze *)((char *)(ctx) + context_extension_offset))
+
 RegisterDefinition * regs_index = NULL;
 unsigned char BREAK_INST[] = { 0, 0, 0, 0 };
 
 static RegisterDefinition * reg_pc = NULL;
 static unsigned regs_cnt = 0;
 static unsigned regs_max = 0;
-static size_t regs_offs = 0;
+
+#define REG_OFFSET(name) offsetof(REG_SET, name)
 
 static RegisterDefinition * alloc_reg(size_t size) {
     assert(regs_cnt < regs_max - 1);
-    regs_index[regs_cnt].offset = regs_offs;
     regs_index[regs_cnt].size = size;
     regs_index[regs_cnt].dwarf_id = -1;
     regs_index[regs_cnt].eh_frame_id = -1;
-    regs_offs += size;
     return regs_index + regs_cnt++;
 }
 
@@ -56,30 +66,30 @@
     return grp;
 }
 
-static RegisterDefinition * alloc_spr(RegisterDefinition * grp, const char * name, size_t size, int id, const char * desc) {
+static RegisterDefinition * alloc_spr(RegisterDefinition * grp, const char * name, size_t offset, size_t size, int id, const char * desc) {
     RegisterDefinition * reg = alloc_reg(size);
     reg->parent = grp;
     reg->name = loc_strdup(name);
     reg->description = loc_strdup(desc);
     reg->dwarf_id = (int16_t)id;
     reg->eh_frame_id = (int16_t)id;
+    reg->offset = offset;
     return reg;
 }
 
 static void microblaze_create_reg_definitions(void) {
     unsigned i = 0;
+    RegisterDefinition * pvr = NULL;
 
-    regs_offs = 0;
     regs_cnt = 0;
     regs_max = 128;
     regs_index = (RegisterDefinition *)loc_alloc_zero(sizeof(RegisterDefinition) * regs_max);
 
     for (i = 0; i < 32; i++) {
-        char name[32];
         RegisterDefinition * r = alloc_reg(4);
-        snprintf(name, sizeof(name), "r%d", i);
-        r->name = loc_strdup(name);
+        r->name = loc_printf("r%d", i);
         r->dwarf_id = r->eh_frame_id = (int16_t)i;
+        r->offset = REG_OFFSET(user.regs.gpr) + i * 4;
         switch (i) {
         case 0: r->no_write = 1; break;
         case 1: r->role = "SP"; break;
@@ -87,27 +97,22 @@
         }
     }
 
-    reg_pc = alloc_spr(NULL, "pc", 4, 32, "Program Control Register");
+    reg_pc = alloc_spr(NULL, "pc", REG_OFFSET(user.regs.pc), 4, 32, "Program Control Register");
     reg_pc->role = "PC";
 
-    alloc_spr(NULL, "msr", 4, 33, "Machine Status Register");
-    alloc_spr(NULL, "ear", 4, 34, "Exception Address Register");
-    alloc_spr(NULL, "esr", 4, 35, "Exception Status Register");
-    alloc_spr(NULL, "fsr", 4, 36, "Floating Point Unit Status Register");
-    alloc_spr(NULL, "btr", 4, 37, "Exception Branch Taken Register");
+    alloc_spr(NULL, "msr", REG_OFFSET(user.regs.msr), 4, 33, "Machine Status Register");
+    alloc_spr(NULL, "ear", REG_OFFSET(user.regs.ear), 4, 34, "Exception Address Register");
+    alloc_spr(NULL, "esr", REG_OFFSET(user.regs.esr), 4, 35, "Exception Status Register");
+    alloc_spr(NULL, "fsr", REG_OFFSET(user.regs.fsr), 4, 36, "Floating Point Unit Status Register");
+    alloc_spr(NULL, "btr", REG_OFFSET(user.regs.btr), 4, 37, "Exception Branch Taken Register");
 
-    /* TODO: check if CPU configured with MMU */
-    {
-        RegisterDefinition * grp = alloc_group("mmu");
-        alloc_spr(grp, "pid", 4, 51, "Process Identifier Register");
-        alloc_spr(grp, "zpr", 4, 52, "Zone Protection Register");
-        alloc_spr(grp, "tlbx", 4, 53, "Translation Look-Aside Buffer Index Register");
-        alloc_spr(grp, "tlbsx", 4, 54, "Translation Look-Aside Buffer Search Index Register");
-        alloc_spr(grp, "tlblo", 4, 55, "Translation Look-Aside Buffer Low Register");
-        alloc_spr(grp, "tlbhi", 4, 56, "Translation Look-Aside Buffer High Register");
+    pvr = alloc_group("pvr");
+    for (i = 0; i < 12; i++) {
+        RegisterDefinition * r = alloc_reg(4);
+        r->name = loc_printf("pvr%d", i);
+        r->offset = REG_OFFSET(user.regs.pvr) + i * 4;
+        r->parent = pvr;
     }
-    alloc_spr(NULL, "slr", 4, 57, "Stack protection - Low pointer");
-    alloc_spr(NULL, "shr", 4, 58, "Stack protection - High pointer");
 }
 
 RegisterDefinition * get_PC_definition(Context * ctx) {
@@ -125,9 +130,186 @@
 }
 #endif
 
+#if ENABLE_external_stepping_mode
+static int read_reg(Context * ctx, RegisterDefinition * def, size_t size, ContextAddress * addr) {
+    size_t i;
+    uint8_t buf[8];
+    uint64_t n = 0;
+    *addr = 0;
+    if (def->dwarf_id == 0) return 0;
+    assert(!def->big_endian);
+    assert(size <= def->size);
+    assert(size <= sizeof(buf));
+    if (context_read_reg(ctx, def, 0, size, buf) < 0) return -1;
+    for (i = 0; i < size; i++) n |= (uint64_t)buf[i] << (i * 8);
+    *addr = (ContextAddress)n;
+    return 0;
+}
+
+static int read_mem(Context * ctx, ContextAddress addr, uint32_t * data) {
+    size_t i;
+    uint8_t buf[4];
+    uint32_t v = 0;
+    if (context_read_mem(ctx, addr, &buf, 4) < 0) return -1;
+    for (i = 0; i < 4; i++) v |= (uint32_t)buf[i] << (big_endian_host() ? 3 - i : i) * 8;
+    *data = v;
+    return 0;
+}
+
+static int br_condition(uint32_t instr, ContextAddress data) {
+    uint32_t ra32 = (uint32_t)data;
+    uint64_t ra64 = (uint64_t)data;
+    switch ((instr >> 21) & 0xf) {
+    case  0: return ra32 == 0;
+    case  1: return ra32 != 0;
+    case  2: return ra32 < 0;
+    case  3: return ra32 <= 0;
+    case  4: return ra32 > 0;
+    case  5: return ra32 >= 0;
+    case  8: return ra64 == 0;
+    case  9: return ra64 != 0;
+    case 10: return ra64 < 0;
+    case 11: return ra64 <= 0;
+    case 12: return ra64 > 0;
+    case 13: return ra64 >= 0;
+    }
+    return 0;
+}
+
+static int get_next_address(Context * ctx, ContextAddress * next_addr) {
+    uint32_t instr = 0;
+    uint64_t imm = 0;
+    unsigned imm_bits = 0;
+    ContextAddress addr = 0;
+    ContextAddress instr_addr = 0;
+
+    /* Read opcode at PC */
+    if (read_reg(ctx, reg_pc, reg_pc->size, &addr) < 0) return -1;
+    if (read_mem(ctx, addr, &instr) < 0) return -1;
+    instr_addr = addr;
+    addr += 4;
+
+    /* Check for IMM and IMML instructions */
+    if ((instr & 0xffff0000) == 0xb0000000) {
+        imm_bits = 16;
+        imm = instr & 0xffff;
+        if (read_mem(ctx, addr, &instr) < 0) return -1;
+        addr += 4;
+    }
+    else if ((instr & 0xff000000) == 0xb2000000) {
+        imm_bits = 24;
+        imm = instr & 0xffffff;
+        if (read_mem(ctx, addr, &instr) < 0) return -1;
+        addr += 4;
+    }
+
+    /* Check for branch and return instructions */
+    if ((instr & 0xfc0007ff) == 0x98000000) {
+        /* BR .. BRK */
+        ContextAddress rb_data = 0;
+        RegisterDefinition * rb_def = regs_index + ((instr >> 11) & 0x1f);
+        if (read_reg(ctx, rb_def, rb_def->size, &rb_data) < 0) return -1;
+        if (instr & (1 << 19)) {
+            addr = rb_data;
+        }
+        else {
+            addr = instr_addr + rb_data;
+        }
+    }
+    else if ((instr & 0xfc0007ff) == 0x9c000000) {
+        /* BEQ .. BGED */
+        ContextAddress ra_data = 0;
+        RegisterDefinition * ra_def = regs_index + ((instr >> 16) & 0x1f);
+        if (read_reg(ctx, ra_def, ra_def->size, &ra_data) < 0) return -1;
+        if (br_condition(instr, ra_data)) {
+            ContextAddress rb_data = 0;
+            RegisterDefinition * rb_def = regs_index + ((instr >> 11) & 0x1f);
+            if (read_reg(ctx, rb_def, rb_def->size, &rb_data) < 0) return -1;
+            addr = instr_addr + rb_data;
+        }
+        else if (instr & (1 << 25)) {
+            addr += 4;
+        }
+    }
+    else if ((instr & 0xfc000000) == 0xb4000000) {
+        /* RTSD .. RTED */
+        ContextAddress ra_data = 0;
+        RegisterDefinition * ra_def = regs_index + ((instr >> 16) & 0x1f);
+        if (read_reg(ctx, ra_def, ra_def->size, &ra_data) < 0) return -1;
+        imm = (imm << 16) | (instr & 0xffff);
+        imm_bits += 16;
+        if (imm & ((uint64_t)1 << (imm_bits - 1))) {
+            imm |= ~(((uint64_t)1 << imm_bits) - 1);
+        }
+        addr = ra_data + imm;
+    }
+    else if ((instr & 0xfc000000) == 0xb8000000) {
+        /* BRI .. BRKI */
+        imm = (imm << 16) | (instr & 0xffff);
+        imm_bits += 16;
+        if (imm & ((uint64_t)1 << (imm_bits - 1))) {
+            imm |= ~(((uint64_t)1 << imm_bits) - 1);
+        }
+        if (instr & (1 << 19)) {
+            addr = imm;
+        }
+        else {
+            addr = instr_addr + imm;
+        }
+    }
+    else if ((instr & 0xfc000000) == 0xbc000000) {
+        /* BEQI .. BGEID */
+        ContextAddress ra_data = 0;
+        RegisterDefinition * ra_def = regs_index + ((instr >> 16) & 0x1f);
+        if (read_reg(ctx, ra_def, ra_def->size, &ra_data) < 0) return -1;
+        if (br_condition(instr, ra_data)) {
+            imm = (imm << 16) | (instr & 0xffff);
+            imm_bits += 16;
+            if (imm & ((uint64_t)1 << (imm_bits - 1))) {
+                imm |= ~(((uint64_t)1 << imm_bits) - 1);
+            }
+            addr = instr_addr + imm;
+        }
+        else if (instr & (1 << 25)) {
+            addr += 4;
+        }
+    }
+
+    *next_addr = addr;
+    return 0;
+}
+
+int cpu_enable_stepping_mode(Context * ctx, uint32_t * is_cont) {
+    Context * grp = context_get_group(ctx, CONTEXT_GROUP_PROCESS);
+    ContextExtensionMicroBlaze * ext = EXT(grp);
+    assert(!grp->exited);
+    assert(!ext->sw_stepping);
+    if (get_next_address(ctx, &ext->addr) < 0) return -1;
+    if (context_read_mem(grp, ext->addr, ext->opcode, sizeof(BREAK_INST)) < 0) return -1;
+    if (context_write_mem(grp, ext->addr, BREAK_INST, sizeof(BREAK_INST)) < 0) return -1;
+    ext->sw_stepping = 1;
+    run_ctrl_lock();
+    *is_cont = 1;
+    return 0;
+}
+
+int cpu_disable_stepping_mode(Context * ctx) {
+    Context * grp = context_get_group(ctx, CONTEXT_GROUP_PROCESS);
+    ContextExtensionMicroBlaze * ext = EXT(grp);
+    if (ext->sw_stepping) {
+        run_ctrl_unlock();
+        ext->sw_stepping = 0;
+        if (grp->exited) return 0;
+        return context_write_mem(grp, ext->addr, ext->opcode, sizeof(BREAK_INST));
+    }
+    return 0;
+}
+#endif
+
 void ini_cpudefs_mdep(void) {
     static uint8_t bkpt_le[4] = { 0x18, 0x00, 0x0c, 0xba };
     static uint8_t bkpt_be[4] = { 0xba, 0x0c, 0x00, 0x18 };
+    context_extension_offset = context_extension(sizeof(ContextExtensionMicroBlaze));
     memcpy(BREAK_INST, big_endian_host() ? bkpt_be : bkpt_le, 4);
     microblaze_create_reg_definitions();
 }
diff --git a/agent/machine/microblaze/tcf/cpudefs-mdep.h b/agent/machine/microblaze/tcf/cpudefs-mdep.h
index 6fcc999..d5084c4 100644
--- a/agent/machine/microblaze/tcf/cpudefs-mdep.h
+++ b/agent/machine/microblaze/tcf/cpudefs-mdep.h
@@ -20,6 +20,10 @@
 extern void add_cpudefs_disassembler(Context * cpu_ctx);
 #endif
 
+#if !defined(ENABLE_external_stepping_mode)
+#define ENABLE_external_stepping_mode 1
+#endif
+
 extern RegisterDefinition * regs_index;
 extern unsigned char BREAK_INST[4];
 
diff --git a/agent/machine/microblaze/tcf/disassembler-microblaze.c b/agent/machine/microblaze/tcf/disassembler-microblaze.c
index 1a03a3a..0fc5e06 100644
--- a/agent/machine/microblaze/tcf/disassembler-microblaze.c
+++ b/agent/machine/microblaze/tcf/disassembler-microblaze.c
@@ -1497,14 +1497,14 @@
     return &dr;
 }
 
-DisassemblyResult * disassemble_microblaze(uint8_t * buf,
+DisassemblyResult * disassemble_microblaze(uint8_t * code,
         ContextAddress addr, ContextAddress size, DisassemblerParams * params) {
     en_64_bit = 0;
-    return disassemble_instr(buf, addr, size, params);
+    return disassemble_instr(code, addr, size, params);
 }
 
-DisassemblyResult * disassemble_microblaze_x(uint8_t * buf,
+DisassemblyResult * disassemble_microblaze_x(uint8_t * code,
         ContextAddress addr, ContextAddress size, DisassemblerParams * params) {
     en_64_bit = 1;
-    return disassemble_instr(buf, addr, size, params);
+    return disassemble_instr(code, addr, size, params);
 }
diff --git a/agent/machine/microblaze/tcf/regset-mdep.h b/agent/machine/microblaze/tcf/regset-mdep.h
index b31ecc0..e209b19 100644
--- a/agent/machine/microblaze/tcf/regset-mdep.h
+++ b/agent/machine/microblaze/tcf/regset-mdep.h
@@ -12,3 +12,6 @@
  * Contributors:
  *     Xilinx - initial API and implementation
  *******************************************************************************/
+
+/* offset to be applied to the PC after a software trap */
+#define TRAP_OFFSET 0
diff --git a/agent/msvc/agent-vc2015.vcxproj b/agent/msvc/agent-vc2015.vcxproj
index 5cfd82b..5e1d219 100644
--- a/agent/msvc/agent-vc2015.vcxproj
+++ b/agent/msvc/agent-vc2015.vcxproj
@@ -360,6 +360,7 @@
     <ClInclude Include="..\machine\a64\tcf\cpu-regs-gdb.h" />
     <ClInclude Include="..\machine\arm\tcf\cpu-regs-gdb.h" />
     <ClInclude Include="..\machine\i386\tcf\cpu-regs-gdb.h" />
+    <ClInclude Include="..\machine\microblazex\tcf\cpu-regs-gdb.h" />
     <ClInclude Include="..\machine\microblaze\tcf\cpu-regs-gdb.h" />
     <ClInclude Include="..\machine\microblaze\tcf\cpudefs-mdep.h" />
     <ClInclude Include="..\machine\microblaze\tcf\disassembler-microblaze.h" />
diff --git a/agent/msvc/agent-vc2015.vcxproj.filters b/agent/msvc/agent-vc2015.vcxproj.filters
index d081654..778182a 100644
--- a/agent/msvc/agent-vc2015.vcxproj.filters
+++ b/agent/msvc/agent-vc2015.vcxproj.filters
@@ -64,6 +64,9 @@
     <Filter Include="machine\powerpc">
       <UniqueIdentifier>{ff538629-61f5-41bb-907c-975bcb6161f1}</UniqueIdentifier>
     </Filter>
+    <Filter Include="machine\microblazex">
+      <UniqueIdentifier>{4e8ce030-4562-4d31-8bf8-3f970be0c354}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\tcf\framework\asyncreq.c">
@@ -837,5 +840,8 @@
     <ClInclude Include="..\machine\microblaze\tcf\disassembler-microblaze.h">
       <Filter>machine\microblaze</Filter>
     </ClInclude>
+    <ClInclude Include="..\machine\microblazex\tcf\cpu-regs-gdb.h">
+      <Filter>machine\microblazex</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/agent/system/GNU/Linux/tcf/context-linux.c b/agent/system/GNU/Linux/tcf/context-linux.c
index 1462d46..79d6fe8 100644
--- a/agent/system/GNU/Linux/tcf/context-linux.c
+++ b/agent/system/GNU/Linux/tcf/context-linux.c
@@ -921,12 +921,12 @@
         }
 #else
         if (i >= offsetof(REG_SET, user.regs) && i < offsetof(REG_SET, user.regs) + sizeof(ext->regs->user.regs)) {
-            if (ptrace(PTRACE_GETREGS, ext->pid, 0, &ext->regs->user.regs) < 0 && errno != ESRCH) {
-                err = errno;
-                break;
+            /* Try to read all registers at once */
+            if (ptrace(PTRACE_GETREGS, ext->pid, 0, &ext->regs->user.regs) == 0) {
+                memset(ext->regs_valid + offsetof(REG_SET, user.regs), 0xff, sizeof(ext->regs->user.regs));
+                continue;
             }
-            memset(ext->regs_valid + offsetof(REG_SET, user.regs), 0xff, sizeof(ext->regs->user.regs));
-            continue;
+            /* Did not work, use PTRACE_PEEKUSER to get one register at a time */
         }
         if (i >= offsetof(REG_SET, fp) && i < offsetof(REG_SET, fp) + sizeof(ext->regs->fp)) {
 #if defined(__arm__) || defined(__aarch64__)
