I took a quick look at the VirtualBoy's "libgccvb" source code, and was surprised to see so many uses of "u8" and "u16" in the code.
The V810 CPU was designed to handle 32-bit variables ... and it doesn't do any arithmetic operations on 16-bit or 8-bit values.
That means that the compiler needs to do a lot of masking/sign-extending when it's asked to deal with 16-bit or 8-bit variables, just so that it keeps the results correct within the limits of 16-bit or 8-bit rounding.
You really should be using "int" and "unsigned" as much as possible, and avoid "short" and "char" variables.
I thought that it would be interesting to see how the different GCC compiler versions compile a couple of simple C functions.
In each case, the original libgccvb version is first, and then 1 or 2 versions replacing the "u16" and "u8" variables with "unsigned" instead.
It seems strange to me that GCC 4.4.2 is doing such a relatively-poor job compared to GCC 2.9.5 or GCC 4.7.4, I wonder what went wrong?
All examples are compiled with "-O2 -fomit-frame-pointer".
****************************************************************************************
****************************************************************************************
void copymem (u8* dest, const u8* src, u16 num)
{
u16 i;
for (i = 0; i < num; i++) {
*dest++ = *src++;
}
}
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
_copymem: andi 65535,r8,r8 _copymem: andi 65535,r8,r8 _copymem: andi 65535,r8,r8
be .L1 mov 0,r10 be .L4
addi -1,r8,r11 cmp r8,r10 mov 0,r10
andi 65535,r11,r11 bnl .L4 .L3: mov r7,r11
add 1,r11 .L6: add 1,r10 add r10,r11
add r6,r11 ld.b 0[r7],r11 ld.b 0[r11],r12
.L3: ld.b 0[r7],r10 andi 65535,r10,r10 mov r6,r11
add 1,r7 add 1,r7 add r10,r11
st.b r10,0[r6] st.b r11,0[r6] add 1,r10
add 1,r6 add 1,r6 st.b r12,0[r11]
cmp r11,r6 cmp r8,r10 andi 65535,r10,r11
bne .L3 bl .L6 cmp r11,r8
.L1: jmp [r31] .L4: jmp [r31] bh .L3
.L4: jmp [r31]
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
****************************************************************************************
****************************************************************************************
void copymem2 (u8* dest, const u8* src, unsigned num)
{
unsigned i;
for (i = 0; i < num; i++) {
*dest++ = *src++;
}
}
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
_copymem2:mov r6,r11 _copymem2:mov 0,r11 _copymem2:cmp r0,r8
add r8,r11 cmp r8,r11 be .L10
cmp 0,r8 bnl .L10 mov 0,r10
be .L7 .L12: ld.b 0[r7],r10 .L9: mov r7,r11
.L11: ld.b 0[r7],r10 add 1,r11 add r10,r11
add 1,r7 add 1,r7 ld.b 0[r11],r12
st.b r10,0[r6] st.b r10,0[r6] mov r6,r11
add 1,r6 add 1,r6 add r10,r11
cmp r11,r6 cmp r8,r11 st.b r12,0[r11]
bne .L11 bl .L12 add 1,r10
.L7: jmp [r31] .L10: jmp [r31] cmp r10,r8
bh .L9
.L10: jmp [r31]
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
****************************************************************************************
****************************************************************************************
void addmem (u8* dest, const u8* src, u16 num, u8 offset)
{
u16 i;
for (i = 0; i < num; i++) {
*dest++ = (*src++ + offset);
}
}
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
_addmem: andi 65535,r8,r8 _addmem: andi 65535,r8,r8 _addmem: andi 65535,r8,r8
andi 255,r9,r9 mov 0,r11 andi 255,r9,r9
cmp 0,r8 andi 255,r9,r9 cmp r0,r8
be .L13 cmp r8,r11 be .L20
addi -1,r8,r11 bnl .L22 mov 0,r10
andi 65535,r11,r11 .L24: mov r9,r10 .L19: mov r7,r11
add 1,r11 add 1,r11 add r10,r11
add r6,r11 ld.b 0[r7],r12 ld.b 0[r11],r12
.L15: ld.b 0[r7],r10 andi 65535,r11,r11 mov r6,r11
add 1,r7 add r12,r10 add r10,r11
add r9,r10 add 1,r7 add r9,r12
st.b r10,0[r6] st.b r10,0[r6] add 1,r10
add 1,r6 add 1,r6 st.b r12,0[r11]
cmp r11,r6 cmp r8,r11 andi 65535,r10,r11
bne .L15 bl .L24 cmp r11,r8
.L13: jmp [r31] .L22: jmp [r31] bh .L19
.L20: jmp [r31]
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
****************************************************************************************
****************************************************************************************
void addmem2 (u8* dest, const u8* src, unsigned num, u8 offset)
{
unsigned i;
for (i = 0; i < num; i++) {
*dest++ = (*src++ + offset);
}
}
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
_addmem2: mov r6,r11 _addmem2: mov 0,r12 _addmem2: andi 255,r9,r9
andi 255,r9,r9 andi 255,r9,r9 cmp r0,r8
add r8,r11 cmp r8,r12 be .L20
cmp 0,r8 bnl .L22 mov 0,r10
be .L18 .L24: mov r9,r10 .L19: mov r7,r11
.L22: ld.b 0[r7],r10 ld.b 0[r7],r11 add r10,r11
add 1,r7 add 1,r12 ld.b 0[r11],r12
add r9,r10 add r11,r10 mov r6,r11
st.b r10,0[r6] add 1,r7 add r10,r11
add 1,r6 st.b r10,0[r6] add r9,r12
cmp r11,r6 add 1,r6 st.b r12,0[r11]
bne .L22 cmp r8,r12 add 1,r10
.L18: jmp [r31] bl .L24 cmp r10,r8
.L22: jmp [r31] bh .L19
.L20: jmp [r31]
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
****************************************************************************************
****************************************************************************************
void addmem3 (u8* dest, const u8* src, unsigned num, unsigned offset)
{
unsigned i;
for (i = 0; i < num; i++) {
*dest++ = (*src++ + offset);
}
}
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
_addmem3: cmp 0,r8 _addmem3: mov 0,r12 _addmem3: cmp r0,r8
be .L24 cmp r8,r12 be .L25
andi 255,r9,r9 bnl .L28 andi 255,r9,r9
add r6,r8 .L30: mov r9,r10 mov 0,r10
.L26: ld.b 0[r7],r10 ld.b 0[r7],r11 .L24: mov r7,r11
add 1,r7 add 1,r12 add r10,r11
add r9,r10 add r11,r10 ld.b 0[r11],r12
st.b r10,0[r6] add 1,r7 mov r6,r11
add 1,r6 st.b r10,0[r6] add r10,r11
cmp r8,r6 add 1,r6 add r9,r12
bne .L26 cmp r8,r12 st.b r12,0[r11]
.L24: jmp [r31] bl .L30 add 1,r10
.L28: jmp [r31] cmp r10,r8
bh .L24
.L25: jmp [r31]
********* GCC 4.7.4 ******************* GCC 2.9.5 ******************* GCC 4.4.2 ********
****************************************************************************************
****************************************************************************************