Home > Software engineering >  Launching core 1 on the RP2040 with baremetal assembly
Launching core 1 on the RP2040 with baremetal assembly

Time:09-06

As I understand the documentation, 2.8.2, the process of launching core 1 is to send a sequence of values, with the final 3 being the vector table, stack pointer, and entry point, over the FIFO, while core 1 will echo the values back to you.

From the c code provided by the documentation, I wrote out this assembly:

    .cpu cortex-m0
    .thumb
ent:
    ldr r0, =0x20001000
    mov sp, r0              @init stack pointer

    ldr r0, =0xe000ed08
    ldr r3, [r0]            @vector table offset register
core:
    mov r7, pc
    b fifo_drain
    sev
    mov r1, #0
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #0
    bne core

    mov r7, pc
    b fifo_drain
    sev
    mov r1, #0
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #0
    bne core

    mov r1, #1
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #1
    bne core

    mov r1, r3              @vector table
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, r3
    bne core

    mov r1, sp              @stack pointer
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, sp
    bne core

    mov r1, pc
    add r1, #2              @entry point
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read

    ldr r0, =0xd0000000
    ldr r1, [r0]
    cmp r1, #1
    beq led

The sequence of values sent over the FIFO is {0, 0, 1, vt, sp, ent}, and when the value isn't echoed back, the sequence starts over. The entry point is simply the last 4 lines, where the core reads the CPUID register from the SIO, and turns on the LED (GPIO25) if the cpu id is 1.

The sequence seems to get stuck in a loop at the vector table, which makes sense since I barely understand it, the FIFO just doesn't echo it back. Also, the documentation has a note next to the entry point that says "don't forget the thumb bit!", whatever that means.

Edit:

Updated code, same problem:

    .cpu cortex-m0
    .thumb
ent:
    ldr r0, =0x20001000
    mov sp, r0              @init stack pointer

    ldr r0, =0xe000ed08
    ldr r1, =0x20000000
    str r1, [r0]            @init vtor

    ldr r0, =0xd0000000
    ldr r1, [r0]
    cmp r1, #1
    beq led
    
    b core
    
.thumb_func
core:
    mov r7, pc
    b fifo_drain
    mov r1, #0
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #0
    bne core
    
    mov r7, pc
    b fifo_drain
    mov r1, #0
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #0
    bne core
    
    mov r1, #1
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #1
    bne core
    
    ldr r3, =0x20000000
    mov r1, r3              @vector table
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, r3
    bne core
    
    mov r1, sp              @stack pointer
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, sp
    bne core
    
    ldr r3, =0x20000001
    mov r1, r3              @entry point
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, r3
    bne core
    
    b loop
    
.thumb_func
fifo_stat:
    ldr r0, =0xd0000050
    ldr r1, [r0]
    mov r2, #15
    and r1, r1, r2
    mov pc, r7

.thumb_func
fifo_writ:
    ldr r0, =0xd0000050
    ldr r3, [r0]
    mov r2, #2
    and r3, r3, r2
    beq fifo_writ
    
    ldr r0, =0xd0000054
    str r1, [r0]
    sev
    mov pc, r7

.thumb_func
fifo_read:
    ldr r0, =0xd0000050
    ldr r3, [r0]
    mov r2, #1
    and r3, r3, r2
    beq _wfe

    ldr r0, =0xd0000058
    ldr r1, [r0]
    mov pc, r7

.thumb_func
fifo_drain:
    ldr r0, =0xd0000058
    ldr r1, [r0]
    ldr r0, =0xd0000050
    ldr r1, [r0]
    mov r2, #1
    and r1, r1, r2
    bne fifo_drain
    sev
    mov pc, r7
    
.thumb_func
_wfe:
    wfe
    b fifo_read

.thumb_func
led:
    movs r1, #32            @io_bank
    ldr r0, =0x4000f000
    str r1, [r0]            @release reset on io_bank

    movs r1, #5             @sio
    ldr r0, =0x400140cc
    str r1, [r0]            @assign sio to gpio25_ctrl

    movs r1, #1
    lsl r1, r1, #25
    
    ldr r0, =0xd0000024
    str r1, [r0]            @enable output

    ldr r0, =0xd0000014
    str r1, [r0]            @turn on the led
    
.thumb_func
loop:
    nop
    b loop

CodePudding user response:

My core zero code is a mixture of C and assembly language. I think we can sort your questions out though.

My bootstrap looks like this

.cpu cortex-m0
.thumb

    ldr r1,=0xD0000000 ;@SIO_CPUID
    ldr r0,[r1]
    cmp r0,#0
    bne core_one

    ;@ core_zero
    ldr r0,=0x20002000
    mov sp,r0
    bl zero_entry
    b .

core_one:
    ;@ core_one
    bl notmain
    b .

.align
.ltorg


;@ ----------------------------------
.balign 0x100

.thumb_func
.globl PUT32
PUT32:
    str r1,[r0]
    bx lr

.thumb_func
.globl GET32
GET32:
    ldr r0,[r0]
    bx lr

.globl SEV
.thumb_func
SEV:
    sev
    bx lr

.globl WFE
.thumb_func
WFE:
    wfe
    bx lr

.globl DELAY
.thumb_func
DELAY:
    sub r0,#1
    bne DELAY
    bx lr

And I link for 0x20000000 and build my uf2 file for sram/0x20000000 as the destination for the binary. It depends on circumstances, but you need to know where your code is running.

My core zero code looks like this

extern void PUT32 ( unsigned int, unsigned int );
extern unsigned int GET32 ( unsigned int );

extern void SEV ( void );
extern void WFE ( void );

#define SIO_BASE                    0xD0000000

#define SIO_FIFO_ST                 (SIO_BASE 0x50)
#define SIO_FIFO_WR                 (SIO_BASE 0x54)
#define SIO_FIFO_RD                 (SIO_BASE 0x58)

static void fifo_flush ( void )
{
    while(1)
    {
        if((GET32(SIO_FIFO_ST)&0x1) == 0) break; //zero if empty
        GET32(SIO_FIFO_RD);
    }
    SEV();
}

static unsigned int fifo_send ( unsigned int cmd )
{
    while(1)
    {
        if((GET32(SIO_FIFO_ST)&0x2) != 0) break; //one if ready
    }
    PUT32(SIO_FIFO_WR,cmd);
    SEV();
    while(1)
    {
        if((GET32(SIO_FIFO_ST)&0x1) == 0) //zero if  empty
        {
            WFE();
        }
        else
        {
            break;
        }
    }
    return(GET32(SIO_FIFO_RD));
}

unsigned int zero_entry ( void )
{
    unsigned int ra;

    while(1)
    {
        fifo_flush();
        ra=fifo_send(0);
        if(ra!=0) continue;
        fifo_flush();
        ra=fifo_send(0);
        if(ra!=0) continue;
        ra=fifo_send(1);
        if(ra!=1) continue;
        ra=fifo_send(0x20000000); //vector_table
        if(ra!=0x20000000) continue;
        ra=fifo_send(0x20003000);    //stack pointer
        if(ra!=0x20003000) continue;
        ra=fifo_send(0x20000001);    //entry
        if(ra!=0x20000001) continue;
        break;
    }
    return(0);
}

And if interested my core one code looks like this

void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
void DELAY ( unsigned int );

#define RESETS_BASE                 0x4000C000

#define RESETS_RESET_RW             (RESETS_BASE 0x0 0x0000)
#define RESETS_RESET_XOR            (RESETS_BASE 0x0 0x1000)
#define RESETS_RESET_SET            (RESETS_BASE 0x0 0x2000)
#define RESETS_RESET_CLR            (RESETS_BASE 0x0 0x3000)

#define RESETS_WDSEL_RW             (RESETS_BASE 0x4 0x0000)
#define RESETS_WDSEL_XOR            (RESETS_BASE 0x4 0x1000)
#define RESETS_WDSEL_SET            (RESETS_BASE 0x4 0x2000)
#define RESETS_WDSEL_CLR            (RESETS_BASE 0x4 0x3000)

#define RESETS_RESET_DONE_RW        (RESETS_BASE 0x8 0x0000)
#define RESETS_RESET_DONE_XOR       (RESETS_BASE 0x8 0x1000)
#define RESETS_RESET_DONE_SET       (RESETS_BASE 0x8 0x2000)
#define RESETS_RESET_DONE_CLR       (RESETS_BASE 0x8 0x3000)

#define SIO_BASE                    0xD0000000

#define SIO_GPIO_OUT_RW             (SIO_BASE 0x10)
#define SIO_GPIO_OUT_SET            (SIO_BASE 0x14)
#define SIO_GPIO_OUT_CLR            (SIO_BASE 0x18)
#define SIO_GPIO_OUT_XOR            (SIO_BASE 0x1C)

#define SIO_GPIO_OE_RW              (SIO_BASE 0x20)
#define SIO_GPIO_OE_SET             (SIO_BASE 0x24)
#define SIO_GPIO_OE_CLR             (SIO_BASE 0x28)
#define SIO_GPIO_OE_XOR             (SIO_BASE 0x2C)

#define IO_BANK0_BASE               0x40014000

#define IO_BANK0_GPIO25_STATUS_RW   (IO_BANK0_BASE 0x0C8 0x0000)
#define IO_BANK0_GPIO25_STATUS_XOR  (IO_BANK0_BASE 0x0C8 0x1000)
#define IO_BANK0_GPIO25_STATUS_SET  (IO_BANK0_BASE 0x0C8 0x2000)
#define IO_BANK0_GPIO25_STATUS_CLR  (IO_BANK0_BASE 0x0C8 0x3000)

#define IO_BANK0_GPIO25_CTRL_RW     (IO_BANK0_BASE 0x0CC 0x0000)
#define IO_BANK0_GPIO25_CTRL_XOR    (IO_BANK0_BASE 0x0CC 0x1000)
#define IO_BANK0_GPIO25_CTRL_SET    (IO_BANK0_BASE 0x0CC 0x2000)
#define IO_BANK0_GPIO25_CTRL_CLR    (IO_BANK0_BASE 0x0CC 0x3000)

int notmain ( void )
{
    //release reset on IO_BANK0
    PUT32(RESETS_RESET_CLR,1<<5); //IO_BANK0
    //wait for reset to be done
    while(1)
    {
        if((GET32(RESETS_RESET_DONE_RW)&(1<<5))!=0) break;
    }

    //output disable
    PUT32(SIO_GPIO_OE_CLR,1<<25);
    //turn off pin 25
    PUT32(SIO_GPIO_OUT_CLR,1<<25);

    //set the function select to SIO (software controlled I/O)
    PUT32(IO_BANK0_GPIO25_CTRL_RW,5);

    //output enable
    PUT32(SIO_GPIO_OE_SET,1<<25);
    while(1)
    {
        //turn on the led
        PUT32(SIO_GPIO_OUT_SET,1<<25);
        DELAY(0x100000);
        //turn off the led
        PUT32(SIO_GPIO_OUT_CLR,1<<25);
        DELAY(0x100000);
    }
    return(0);
}

What does the thumb bit mean? If you look at the bx instruction or other related information in the ARM documentation (armv6-m architectural reference manual). This goes back to the full sized cores that can run arm and thumb code. Since instructions in both modes are aligned they chose to use the lsbit for branch by address instructions to determine the mode to use at the branch destination (originally only the bx instruction but later pop and others). If the lsbit is set then it is branching to a thumb instruction, if reset then branching to an arm instruction.

The cortex-ms they chose to go with a vector table (makes sense based on the target market for the product) instead of hardcoded addresses like the prior full sized cores (ARM7, ARM9, ARM10, ARM11). As documented in the architectural reference manual the first word is a value to put in the stack pointer to save that step in the boot process and the second is the reset vector.

Now ARM chose to make it such that you had to put a thumb function pointer address in there meaning the lsbit is ORRed with one. I emphasize ORRed with one and not ADD one, because if you use your tools properly (IMO) then the tool will set the lsbit and ADDing one you will then break it.

Letting the tools do the work

.cpu cortex-m0
.thumb

.thumb_func
.global _start
_start:
.word 0x20001000
.word reset
.word hang
.word hang

.word hang
.word hang
.word hang
.word hang

.word hang
.word hang
.word hang
.word hang

.word hang
.word hang
.word hang
.word hang

.thumb_func
reset:
    bl notmain
    b hang
.thumb_func
hang:   b .

(This does not work on a pico, this is a what does the thumb it mean).

.thumb_func causes the next label it finds in the code to be a thumb function address not just a plain old address.

So this gives

00200000 <_start>:
  200000:   20001000    andcs   r1, r0, r0
  200004:   00200041    eoreq   r0, r0, r1, asr #32
  200008:   00200047    eoreq   r0, r0, r7, asr #32
  20000c:   00200047    eoreq   r0, r0, r7, asr #32
  200010:   00200047    eoreq   r0, r0, r7, asr #32
  200014:   00200047    eoreq   r0, r0, r7, asr #32
  200018:   00200047    eoreq   r0, r0, r7, asr #32
  20001c:   00200047    eoreq   r0, r0, r7, asr #32
  200020:   00200047    eoreq   r0, r0, r7, asr #32
  200024:   00200047    eoreq   r0, r0, r7, asr #32
  200028:   00200047    eoreq   r0, r0, r7, asr #32
  20002c:   00200047    eoreq   r0, r0, r7, asr #32
  200030:   00200047    eoreq   r0, r0, r7, asr #32
  200034:   00200047    eoreq   r0, r0, r7, asr #32
  200038:   00200047    eoreq   r0, r0, r7, asr #32
  20003c:   00200047    eoreq   r0, r0, r7, asr #32

00200040 <reset>:
  200040:   f000 f81a   bl  200078 <notmain>
  200044:   e7ff        b.n 200046 <hang>

00200046 <hang>:
  200046:   e7fe        b.n 200046 <hang>

Built and linked for a different mcu, not the pci. reset is at 0x00200040 and hang at 0x00200046. The tools have done the work for us, because we used .thumb_func and put the address orred with one.

And everything is happy and this mcu will boot, or at least it won't hang right after reset.

The longer way to do this, there is no .arm_func so for ARM and thumb you can instead do

.type reset,%function
reset:

It does not have to be immediately before the label, but you have to do the extra work to type in the label name.

If I take your code and change it like this:

    ldr r1, =one_entry
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read

.thumb_func
one_entry:
    ldr r0, =0xd0000000
    ldr r1, [r0]
    cmp r1, #1
    beq led

Then I get

2000005a:   4907        ldr r1, [pc, #28]   ; (20000078 <one_entry 0x14>)
2000005c:   467f        mov r7, pc
2000005e:   e011        b.n 20000084 <fifo_writ>
20000060:   467f        mov r7, pc
20000062:   e00e        b.n 20000082 <fifo_read>

20000064 <one_entry>:
20000064:   4805        ldr r0, [pc, #20]   ; (2000007c <one_entry 0x18>)
20000066:   6801        ldr r1, [r0, #0]
20000068:   2901        cmp r1, #1
2000006a:   d00c        beq.n   20000086 <led>
2000006c:   e7fe        b.n 2000006c <one_entry 0x8>
2000006e:   46c0        nop         ; (mov r8, r8)
20000070:   20001000    andcs   r1, r0, r0
20000074:   e000ed08    and lr, r0, r8, lsl #26
20000078:   20000065    andcs   r0, r0, r5, rrx
2000007c:   d0000000    andle   r0, r0, r0

The tool has created the address to the entry point for core one with the lsbit set. 20000065

Now the next problem you have is

mov r1, sp              @stack pointer

You are taking core zeros stack pointer address at this point in core zeros execution and setting that for core one. If you end core zero in an infinite loop after starting core one, then this can work. But if you want to keep doing things with core zero you need to give core one its own stack pointer. In my example you can see that I give core zero 0x20002000 and core one 0x20003000. This would have been very painful to debug as core one would start but you would have random chaos that changes every time you change the code.

And to your VTOR problem. I also tried just reading the VTOR and it did not work. Originally my code had a special vector table:

.globl vector_table
vector_table:
    b reset
    .balign 4
    .word reset ;@ has to be offset 4
    .word loop
    .word loop
    .word loop

And I set the vector table, instead of read it

ldr r1,=0xE000ED08 ;@ VTOR
ldr r0,=vector_table
str r0,[r1]

For core zero which is probably borrowed from other pico code I wrote that might have actually used the table. The b reset because we don't actually get to use the reset vector for core zero so this was my kludge. Could have done alignment stuff and put the vector table somewhere else in memory (and yes for both cores I set the stack pointer myself, initially, but for the above example assumed that core one was doing it itself).

And used that same address vector_table for core one. In this case I could have then read it and it would have worked. You have only provided a fraction so we do not know what you did with the VTOR for core zero before this code, but I assume you did not set it, since your code is not working.

You/we are not using a vector table in these examples so just need to make it happy, so I just forced 0x20000000 and it then worked.

I believe you need to fix all three addresses, the vector table, the entry point, and the stack pointer in order to have success.


From your rewrite, I made these modifications.

    .cpu cortex-m0
    .thumb
ent:
    ldr r0, =0x20001000
    mov sp, r0              @init stack pointer

    ldr r0, =0xe000ed08
    ldr r1, =0x20000000
    str r1, [r0]            @init vtor

    ldr r0, =0xd0000000
    ldr r1, [r0]
    cmp r1, #1
    beq led

    b core

.thumb_func
core:
    mov r7, pc
    b fifo_drain
    mov r1, #0
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #0
    bne core

    mov r7, pc
    b fifo_drain
    mov r1, #0
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #0
    bne core

    mov r1, #1
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, #1
    bne core

    ldr r4, =0x20000000
    mov r1, r4              @vector table
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, r4
    bne core

    mov r4, sp              @stack pointer
    mov r1, r4
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, r4
    bne core

    ldr r4, =0x20000001
    mov r1, r4              @entry point
    mov r7, pc
    b fifo_writ
    mov r7, pc
    b fifo_read
    cmp r1, r4
    bne core

    b loop

.thumb_func
fifo_stat:
    ldr r0, =0xd0000050
    ldr r1, [r0]
    mov r2, #15
    and r1, r1, r2
    mov pc, r7

.thumb_func
fifo_writ:
    ldr r0, =0xd0000050
    ldr r3, [r0]
    mov r2, #2
    and r3, r3, r2
    beq fifo_writ

    ldr r0, =0xd0000054
    str r1, [r0]
    sev
    mov pc, r7

.thumb_func
fifo_read:
    ldr r0, =0xd0000050
    ldr r3, [r0]
    mov r2, #1
    and r3, r3, r2
    beq _wfe

    ldr r0, =0xd0000058
    ldr r1, [r0]
    mov pc, r7

.thumb_func
fifo_drain:
    ldr r0, =0xd0000058
    ldr r1, [r0]
    ldr r0, =0xd0000050
    ldr r1, [r0]
    mov r2, #1
    and r1, r1, r2
    bne fifo_drain
    sev
    mov pc, r7

.thumb_func
_wfe:
    wfe
    b fifo_read

;@ ----------------------------------
.balign 0x100

.thumb_func
led:
    movs r1, #32            @io_bank
    ldr r0, =0x4000f000
    str r1, [r0]            @release reset on io_bank

    movs r1, #5             @sio
    ldr r0, =0x400140cc
    str r1, [r0]            @assign sio to gpio25_ctrl

    movs r1, #1
    lsl r1, r1, #25

    ldr r0, =0xd0000024
    str r1, [r0]            @enable output

    ldr r0, =0xd0000014
    str r1, [r0]            @turn on the led

.thumb_func
loop:
    nop
    b loop

First in a couple of places you used r3 to save the value you wanted to compare against after writing and reading back. But r3 is used both in the write and read so its contents are lost.

Second the program was larger than 0x100 bytes, there is something strange that I would have to understand how I figured it out, so by avoiding the boundary then it worked.

As used above sp did not need to go to r4, but I did it to shotgun the problem.

If I remove the items not needed (the write to VTOR, a b core up front. And I used bl and bx lr to call and return, this saved enough instructions to make the binary less than 0x100 bytes. And it can be used without putting that boundary in.

    .cpu cortex-m0
    .thumb
ent:
    ldr r0, =0x20001000
    mov sp, r0              @init stack pointer

    ldr r0, =0xd0000000
    ldr r1, [r0]
    cmp r1, #1
    beq led

core:
    bl fifo_drain
    mov r1, #0
    bl fifo_writ
    bl fifo_read
    cmp r1, #0
    bne core

    b fifo_drain
    mov r1, #0
    bl fifo_writ
    bl fifo_read
    cmp r1, #0
    bne core

    mov r1, #1
    bl fifo_writ
    bl fifo_read
    cmp r1, #1
    bne core

    ldr r4, =0x20000000
    mov r1, r4              @vector table
    bl fifo_writ
    bl fifo_read
    cmp r1, r4
    bne core

    mov r1, sp              @stack pointer
    bl fifo_writ
    bl fifo_read
    cmp r1, sp
    bne core

    ldr r4, =0x20000001
    mov r1, r4              @entry point
    bl fifo_writ
    bl fifo_read
    cmp r1, r4
    bne core

    b loop

fifo_stat:
    ldr r0, =0xd0000050
    ldr r1, [r0]
    mov r2, #15
    and r1, r1, r2
    bx lr

fifo_writ:
    ldr r0, =0xd0000050
    ldr r3, [r0]
    mov r2, #2
    and r3, r3, r2
    beq fifo_writ

    ldr r0, =0xd0000054
    str r1, [r0]
    sev
    bx lr

fifo_read:
    ldr r0, =0xd0000050
    ldr r3, [r0]
    mov r2, #1
    and r3, r3, r2
    beq _wfe

    ldr r0, =0xd0000058
    ldr r1, [r0]
    bx lr

fifo_drain:
    ldr r0, =0xd0000058
    ldr r1, [r0]
    ldr r0, =0xd0000050
    ldr r1, [r0]
    mov r2, #1
    and r1, r1, r2
    bne fifo_drain
    sev
    bx lr

_wfe:
    wfe
    bl fifo_read

led:
    movs r1, #32            @io_bank
    ldr r0, =0x4000f000
    str r1, [r0]            @release reset on io_bank

    movs r1, #5             @sio
    ldr r0, =0x400140cc
    str r1, [r0]            @assign sio to gpio25_ctrl

    movs r1, #1
    lsl r1, r1, #25

    ldr r0, =0xd0000024
    str r1, [r0]            @enable output

    ldr r0, =0xd0000014
    str r1, [r0]            @turn on the led

loop:
    nop
    b loop

Note the instruction set allows for things like this:

fifo_drain:
    ldr r0, =0xd0000050
    ldr r1, [r0,#8] @0xd0000058
    ldr r1, [r0] @0xd0000050
    mov r2, #1
    and r1, r1, r2
    bne fifo_drain
    sev
    bx lr

not as brute force and simple to read, but saves instructions.

For someone just learning ARM assembly language, and I presume the rp2040 at the same time. I am quite impressed, keep up the excellent work. This particular mcu is very very cool, but also poorly documented. The ARM instruction set is well documented, but with ARM vs thumb and then unified syntax vs not (fortunately you did not hit the difference). And the this 0x100 byte thing, which I cannot remember how I figured out, I think I looked at their code and figured it from that, but I would have to re-research the whole thing. If you want to confirm this for yourself, take a version that is just under 0x100 bytes and then add some nops in the body somewhere to stretch it past 0x100 bytes. Note with the simple changes described and removing unused/needed code I got yours down to

216 bytes read (0xD8)

216 bytes...

bottom line.

You had the right idea on the three parameters but they needed some work. And then a simple oops on using a register outside a function call that got used within a function call. Then the crazy 0x100 byte thing. This is the thing with bare-metal, hard to debug, gotta grind your way through, do not give up.

The mov r7,pc thing, I am actually impressed about, not critical of - a lot of folks would struggle with the two instructions ahead thing.

  • Related