#include <stdio.h>
#include <time.h>
float calc(float *src_mem_32, float *dst_mem_32)
{
float mem_32;
int i, j;
for (j = 0; j < 1024; j++) {
for (i = 0; i < 1024; i++) {
mem_32 = src_mem_32[i] + dst_mem_32[i];
}
}
return mem_32;
}
int main (int argc, char **argv)
{
float src_mem_32[1024] = {1.024};
float dst_mem_32[1024] = {0.933};
float mem_32;
int i, j;
ULONG clockTick = lib_clock();
for (j = 0; j < 1024; j++) {
for (i = 0; i < 1024; i++) {
mem_32 = src_mem_32[i] + dst_mem_32[i];
}
}
mem_32 = calc(src_mem_32, dst_mem_32);
clockTick = lib_clock() - clockTick;
printf("clockTick = %u ms\r\n", (unsigned int)clockTick);
return (0);
}
通过一个计算浮点数的程序来测试浮点配置的运行情况.
[root@sylixos:/apps/epics]# /apps/fpuTest/fpuTest
clockTick = 102 ms
00000304 <calc>:
304: e92d4800 push {fp, lr}
308: e28db004 add fp, sp, #4
30c: e24dd018 sub sp, sp, #24
310: e50b0018 str r0, [fp, #-24]
314: e50b101c str r1, [fp, #-28]
318: e3a03000 mov r3, #0
31c: e50b3010 str r3, [fp, #-16]
320: ea00001a b 390 <calc+0x8c>
324: e3a03000 mov r3, #0
328: e50b300c str r3, [fp, #-12]
32c: ea000011 b 378 <calc+0x74>
330: e51b300c ldr r3, [fp, #-12]
334: e1a03103 lsl r3, r3, #2
338: e51b2018 ldr r2, [fp, #-24]
33c: e0823003 add r3, r2, r3
340: e5931000 ldr r1, [r3]
344: e51b300c ldr r3, [fp, #-12]
348: e1a03103 lsl r3, r3, #2
34c: e51b201c ldr r2, [fp, #-28]
350: e0823003 add r3, r2, r3
354: e5933000 ldr r3, [r3]
358: e1a00001 mov r0, r1
35c: e1a01003 mov r1, r3
360: eb000078 bl 548 <__addsf3>
364: e1a03000 mov r3, r0
368: e50b3008 str r3, [fp, #-8]
36c: e51b300c ldr r3, [fp, #-12]
370: e2833001 add r3, r3, #1
374: e50b300c str r3, [fp, #-12]
378: e51b300c ldr r3, [fp, #-12]
37c: e3530b01 cmp r3, #1024 ; 0x400
380: baffffea blt 330 <calc+0x2c>
384: e51b3010 ldr r3, [fp, #-16]
388: e2833001 add r3, r3, #1
38c: e50b3010 str r3, [fp, #-16]
390: e51b3010 ldr r3, [fp, #-16]
394: e3530b01 cmp r3, #1024 ; 0x400
398: baffffe1 blt 324 <calc+0x20>
39c: e51b3008 ldr r3, [fp, #-8]
3a0: e1a00003 mov r0, r3
3a4: e24bd004 sub sp, fp, #4
3a8: e8bd8800 pop {fp, pc}
__addsf3
, 这个函数是通过非浮点运算计算出浮点数的计算结果, 将两个浮点数的和返回, 实际的软件浮点就是编译器将浮点运算替换为已经准备好的浮点运算的库函数, 且一般软件浮点的效率较低.[root@sylixos:/apps/epics]# /apps/fpuTest/fpuTest
clockTick = 71 ms
00000304 <calc>:
304: e52db004 push {fp} ; (str fp, [sp, #-4]!)
308: e28db000 add fp, sp, #0
30c: e24dd01c sub sp, sp, #28
310: e50b0018 str r0, [fp, #-24]
314: e50b101c str r1, [fp, #-28]
318: e3a03000 mov r3, #0
31c: e50b3010 str r3, [fp, #-16]
320: ea000017 b 384 <calc+0x80>
324: e3a03000 mov r3, #0
328: e50b300c str r3, [fp, #-12]
32c: ea00000e b 36c <calc+0x68>
330: e51b300c ldr r3, [fp, #-12]
334: e1a03103 lsl r3, r3, #2
338: e51b2018 ldr r2, [fp, #-24]
33c: e0823003 add r3, r2, r3
340: ed937a00 vldr s14, [r3]
344: e51b300c ldr r3, [fp, #-12]
348: e1a03103 lsl r3, r3, #2
34c: e51b201c ldr r2, [fp, #-28]
350: e0823003 add r3, r2, r3
354: edd37a00 vldr s15, [r3]
358: ee777a27 vadd.f32 s15, s14, s15
35c: ed4b7a02 vstr s15, [fp, #-8]
360: e51b300c ldr r3, [fp, #-12]
364: e2833001 add r3, r3, #1
368: e50b300c str r3, [fp, #-12]
36c: e51b300c ldr r3, [fp, #-12]
370: e3530b01 cmp r3, #1024 ; 0x400
374: baffffed blt 330 <calc+0x2c>
378: e51b3010 ldr r3, [fp, #-16]
37c: e2833001 add r3, r3, #1
380: e50b3010 str r3, [fp, #-16]
384: e51b3010 ldr r3, [fp, #-16]
388: e3530b01 cmp r3, #1024 ; 0x400
38c: baffffe4 blt 324 <calc+0x20>
390: e51b3008 ldr r3, [fp, #-8]
394: e1a00003 mov r0, r3
398: e24bd000 sub sp, fp, #0
39c: e49db004 pop {fp} ; (ldr fp, [sp], #4)
3a0: e12fff1e bx lr
浮点的二进制接口, 这里的 name 可以设置为 soft, softfp and hard.
如果设置为 soft 就是告诉编译器用软浮点来处理所有的浮点运算, 这样设置的结果相当于 FPU_TYPE 为 disable
如果设置为 softfp 告诉编译器通过浮点指令来进行浮点运算, 但是使用软浮点的 calling conventions(这里后面会解释)
如果设置为 hard 告诉编译器通过浮点指令来运算浮点运算, 且使用 FPU 的 calling conventions
在 SylixOS 系统下只要设置了 -mfpu 就会采用默认的配置 FLOAT_ABI := softfp , 如果需要指定其他的 ABI 配置在 base 工程中的 config.mk 添加相应配置即可.
调用约定(Calling Convention)是规定子过程如何获取参数以及如何返回的方案,其通常与架构、编译器等相关。具体来说,调用约定一般规定了
参数、返回值、返回地址等放置的位置(寄存器、栈或存储器等)
如何将调用子过程的准备工作与恢复现场的工作划分到调用者(Caller)与被调用者(Callee)身上
ARM32 架构的栈帧可以通过下图表示
void func (void)
{
int a = 5;
int x = 10, y = 20;
swap(&x, &y);
printf("a = %d\n",a);
}
通过上述的两种保存寄存器的方式, 人为规定了一些寄存器在 callee 函数之前保存, 而另一些寄存器在 callee 之中保存
r0-r3 are the argument and scratch registers; r0-r1 are also the result registers
r4-r8 are callee-save registers
r9 might be a callee-save register or not (on some variants of AAPCS it is a special register)
r10-r11 are callee-save registers
r12-r15 are special registers
现在在回头理解 softfp 和 hard 配置的区别, softfp 会继续沿用之前软件的 Calling Convention 规则, 而 hard 会通过浮点寄存器进行数据得传递和保存通过反汇编也可以观察到