在Linux下系统调用是用软中断实现的,下面以一个简单的open例子简要分析一下应用层的open是如何调用到内核中的sys_open的。
t8.c
Code: Select all
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(int argc, const char *argv[])
{
int fd;
fd = open(".", O_RDWR);
close(fd);
return 0;
}
将t8.c进行静态编译,然后反汇编,看一下是如何调用open的?
Code: Select all
arm-linux-gcc t8.c --static
arm-linux-objdump -D a.out >a.dis
下面我们截取a.dis中的一部分进行说明:
Code: Select all
......
00008228 <main>:
8228: e92d4800 push {fp, lr}
822c: e28db004 add fp, sp, #4 ; 0x4
8230: e24dd010 sub sp, sp, #16 ; 0x10
8234: e50b0010 str r0, [fp, #-16]
8238: e50b1014 str r1, [fp, #-20]
823c: e59f0028 ldr r0, [pc, #40] ; 826c <main+0x44>
8240: e3a01002 mov r1, #2 ; 0x2 ; #define O_RDWR 00000002
8244: eb002e7d bl 13c40 <__libc_open>
8248: e1a03000 mov r3, r0
824c: e50b3008 str r3, [fp, #-8]
8250: e51b0008 ldr r0, [fp, #-8]
8254: eb002e9d bl 13cd0 <__libc_close>
8258: e3a03000 mov r3, #0 ; 0x0
825c: e1a00003 mov r0, r3
8260: e24bd004 sub sp, fp, #4 ; 0x4
8264: e8bd4800 pop {fp, lr}
8268: e12fff1e bx lr
826c: 00064b8c .word 0x00064b8c
......
00013c40 <__libc_open>:
13c40: e51fc028 ldr ip, [pc, #-40] ; 13c20 <___fxstat64+0x50>
13c44: e79fc00c ldr ip, [pc, ip]
13c48: e33c0000 teq ip, #0 ; 0x0
13c4c: 1a000006 bne 13c6c <__libc_open+0x2c>
13c50: e1a0c007 mov ip, r7
13c54: e3a07005 mov r7, #5 ; 0x5
#在arch/arm/include/asm/unistd.h中:#define __NR_open (__NR_SYSCALL_BASE+5)
其中,__NR_OABI_SYSCALL_BASE是0
13c58: ef000000 svc 0x00000000 #产生软中断
13c5c: e1a0700c mov r7, ip
13c60: e3700a01 cmn r0, #4096 ; 0x1000
13c64: 312fff1e bxcc lr
13c68: ea0008d4 b 15fc0 <__syscall_error>
34: ......
也许会好奇,ARM软中断不是用SWI吗,这里怎么变成了SVC了,请看下面一段话,是从ARM官网copy的:
SVC
超级用户调用。
语法
SVC{cond} #immed
其中:
cond
是一个可选的条件代码(请参阅条件执行)。
immed
是一个表达式,其取值为以下范围内的一个整数:
在 ARM 指令中为 0 到 224–1(24 位值)
在 16 位 Thumb 指令中为 0-255(8 位值)。
用法
SVC 指令会引发一个异常。 这意味着处理器模式会更改为超级用户模式,CPSR 会保存到超级用户模式 SPSR,并且执行会跳转到 SVC 向量(请参阅《开发指南》中的第 6 章 处理处理器异常)。
处理器会忽略 immed。 但异常处理程序会获取它,借以确定所请求的服务。
Note
作为 ARM 汇编语言开发成果的一部分,SWI 指令已重命名为 SVC。 在此版本的 RVCT 中,SWI 指令反汇编为 SVC,并提供注释以指明这是以前的 SWI。
条件标记
此指令不更改标记。
体系结构
此 ARM 指令可用于所有版本的 ARM 体系结构。
在基于ARM的Linux中,异常向量表已经被放置在了0xFFFF0000这个位置。这个过程的完成:
start_kernel ---> setup_arch ---> early_trap_init
Code: Select all
void __init early_trap_init(void)
{
unsigned long vectors = CONFIG_VECTORS_BASE; // 就是0xFFFF0000
extern char __stubs_start[], __stubs_end[];
extern char __vectors_start[], __vectors_end[];
extern char __kuser_helper_start[], __kuser_helper_end[];
int kuser_sz = __kuser_helper_end - __kuser_helper_start;
/*
* Copy the vectors, stubs and kuser helpers (in entry-armv.S)
* into the vector page, mapped at 0xffff0000, and ensure these
* are visible to the instruction stream.
*/
memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
/*
* Copy signal return handlers into the vector page, and
* set sigreturn to be a pointer to these.
*/
memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
sizeof(sigreturn_codes));
flush_icache_range(vectors, vectors + PAGE_SIZE);
modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
}
http://www.cnblogs.com/pengdonglin137/p/3603549.html
把异常中断向量表的位置设置为0xffff0000的话,需要修改协处理器CP15的寄存器C1的第13位,将其设置为1。以Tq2440的提供的内核2.6.30.4为例看一下:
Code: Select all
arch/arm/kernel/head.S
adr lr, __enable_mmu @ return (PIC) address
add pc, r10, #PROCINFO_INITFUNC
Code: Select all
__arm920_proc_info:
.long 0x41009200
.long 0xff00fff0
.long PMD_TYPE_SECT | \
PMD_SECT_BUFFERABLE | \
PMD_SECT_CACHEABLE | \
PMD_BIT4 | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
.long PMD_TYPE_SECT | \
PMD_BIT4 | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
b __arm920_setup
.long cpu_arch_name
.long cpu_elf_name
......
.size __arm920_proc_info, . - __arm920_proc_info
Code: Select all
.type __arm920_setup, #function
__arm920_setup:
mov r0, #0
mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4
#ifdef CONFIG_MMU
mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4
#endif
adr r5, arm920_crval
ldmia r5, {r5, r6} @ 参看以下下面的arm920_crval的实现,本句话执行完后r5和r6分别为:0x3f3f和0x3135
mrc p15, 0, r0, c1, c0 @ get control register v4 获取协处理器p15的寄存器才c1
bic r0, r0, r5
orr r0, r0, r6 @ 我们只关注第13位,这里将r0的第13位设置为了1
mov pc, lr
.size __arm920_setup, . - __arm920_setup
/*
* R
* .RVI ZFRS BLDP WCAM
* ..11 0001 ..11 0101
*
*/
.type arm920_crval, #object
arm920_crval:
crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130
Code: Select all
.macro crval, clear, mmuset, ucset
#ifdef CONFIG_MMU
.word \clear
.word \mmuset
#else
.word \clear
.word \ucset
#endif
.endm
在__arm920_setup中执行完 mov pc, lr后,便跳入了下面的语句:
Code: Select all
__enable_mmu:
#ifdef CONFIG_ALIGNMENT_TRAP
orr r0, r0, #CR_A
#else
bic r0, r0, #CR_A
#endif
#ifdef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CR_C
#endif
#ifdef CONFIG_CPU_BPREDICT_DISABLE
bic r0, r0, #CR_Z
#endif
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic r0, r0, #CR_I
#endif
mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT))
mcr p15, 0, r5, c3, c0, 0 @ load domain access register
mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
b __turn_mmu_on
ENDPROC(__enable_mmu)
看一下__turn_mmu_on的实现(head.S (arch\arm\kernel)):
Code: Select all
.align 5
__turn_mmu_on:
mov r0, r0
mcr p15, 0, r0, c1, c0, 0 @ write control reg
mrc p15, 0, r3, c0, c0, 0 @ read id reg
mov r3, r3
mov r3, r3
mov pc, r13
ENDPROC(__turn_mmu_on)
说完异常向量表的位置,接下来看看软中断的实现。
ARM提供的中断类型:

ARM的异常处理模型:

entry-armv.S (arch\arm\kernel)
Code: Select all
.LCvswi:
.word vector_swi
.globl __stubs_end
__stubs_end:
.equ stubs_offset, __vectors_start + 0x200 - __stubs_start
.globl __vectors_start
__vectors_start:
swi SYS_ERROR0
b vector_und + stubs_offset
ldr pc, .LCvswi + stubs_offset @发生软中断后先跳到这里
b vector_pabt + stubs_offset
b vector_dabt + stubs_offset
b vector_addrexcptn + stubs_offset
b vector_irq + stubs_offset
b vector_fiq + stubs_offset
.globl __vectors_end
__vectors_end:
.data
.globl cr_alignment
.globl cr_no_alignment
cr_alignment:
.space 4
cr_no_alignment:
.space 4
Code: Select all
ENTRY(vector_swi)
sub sp, sp, #S_FRAME_SIZE
stmia sp, {r0 - r12} @ Calling r0 - r12
add r8, sp, #S_PC
stmdb r8, {sp, lr}^ @ Calling sp, lr
mrs r8, spsr @ called from non-FIQ mode, so ok.
str lr, [sp, #S_PC] @ Save calling PC
str r8, [sp, #S_PSR] @ Save CPSR
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
zero_fp
/*
* Get the system call number.
*/
/*
* If we have CONFIG_OABI_COMPAT then we need to look at the swi
* value to determine if it is an EABI or an old ABI call.
*/
ldr r10, [lr, #-4]
@ get SWI instruction r10中存放的就是引起软中断的那条指令的机器码
发生软中断的时候,系统自动将PC-4存放到了lr寄存器,由于是三级流水,
并且是ARM状态,还需要减4才能得到发生软中断的那条指令的机器码所在的地址
A710( and ip, r10, #0x0f000000 @ check for SWI )
A710( teq ip, #0x0f000000 )
A710( bne .Larm710bug )
ldr ip, __cr_alignment
ldr ip, [ip]
mcr p15, 0, ip, c1, c0 @ update control register
enable_irq @在发生中断的时候,相应的中断线在在所有CPU上都会被屏蔽掉
get_thread_info tsk @ 参看下面的介绍
adr tbl, sys_call_table
@ load syscall table pointer 此时tbl(r8)中存放的就是sys_call_table的起始地址
ldr ip, [tsk, #TI_FLAGS] @ check for syscall tracing
/*
* If the swi argument is zero, this is an EABI call and we do nothing.
*
* If this is an old ABI call, get the syscall number into scno and
* get the old ABI syscall table address.
*/
bics r10, r10, #0xff000000
eorne scno, r10, #__NR_OABI_SYSCALL_BASE
ldr
ne
tbl, =sys_oabi_call_table
stmdb sp!, {r4, r5} @ push fifth and sixth args
tst ip, #_TIF_SYSCALL_TRACE @ are we tracing syscalls?
bne __sys_trace
cmp scno, #NR_syscalls @ check upper syscall limit
adr lr, ret_fast_syscall @ return address
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
add r1, sp, #S_OFF
2: mov why, #0 @ no longer a real syscall
cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
bcs arm_syscall
b sys_ni_syscall @ not private func
ENDPROC(vector_swi)
common.S (arch\arm\kernel下面是entry-header.S (arch\arm\kernel)的部分内容:
Code: Select all
/*
* These are the registers used in the syscall handler, and allow us to
* have in theory up to 7 arguments to a function - r0 to r6.
*
* r7 is reserved for the system call number for thumb mode.
*
* Note that tbl == why is intentional.
*
* We must set at least "tsk" and "why" when calling ret_with_reschedule.
*/
scno .req r7 @ syscall number
tbl .req r8 @ syscall table pointer
why .req r8 @ Linux syscall (!= 0)
tsk .req r9 @ current thread_info