Page 2 of 2

Re: Arm Linux系统调用流程详细解析

Posted: 2025-03-06T03:15:26+00:00
by 擎天殿
最后再罗嗦一点,如果用sys_open来搜的话,是搜不到系统调用open的定义的,系统调用函数都是用宏来定义的,比如对于open,在文件fs/open.c文件中这样定义:

Code: Select all


SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
{
    long ret;

    if (force_o_largefile())
        flags |= O_LARGEFILE;

    ret = do_sys_open(AT_FDCWD, filename, flags, mode);
    /* avoid REGPARM breakage on x86: */
    asmlinkage_protect(3, ret, filename, flags, mode);
    return ret;
}

继续回到vector_swi,而如果系统调用号不正确,则会调用arm_syscall函数来进行处理,这个函数在文件arch/arm/kernel/traps.c中定义:

Code: Select all


/*
 * Handle all unrecognised system calls.
 *  0x9f0000 - 0x9fffff are some more esoteric system calls
 */
#define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE)
asmlinkage int arm_syscall(int no, struct pt_regs *regs)
{
    struct thread_info *thread = current_thread_info();
    siginfo_t info;

    if ((no >> 16) != (__ARM_NR_BASE>> 16))
        return bad_syscall(no, regs);

    switch (no & 0xffff) {
    case 0: /* branch through 0 */
        info.si_signo = SIGSEGV;
        info.si_errno = 0;
        info.si_code  = SEGV_MAPERR;
        info.si_addr  = NULL;

        arm_notify_die("branch through zero", regs, &info, 0, 0);
        return 0;

    case NR(breakpoint): /* SWI BREAK_POINT */
        regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
        ptrace_break(current, regs);
        return regs->ARM_r0;

    /*
     * Flush a region from virtual address 'r0' to virtual address 'r1'
     * _exclusive_.  There is no alignment requirement on either address;
     * user space does not need to know the hardware cache layout.
     *
     * r2 contains flags.  It should ALWAYS be passed as ZERO until it
     * is defined to be something else.  For now we ignore it, but may
     * the fires of hell burn in your belly if you break this rule. ;)
     *
     * (at a later date, we may want to allow this call to not flush
     * various aspects of the cache.  Passing '0' will guarantee that
     * everything necessary gets flushed to maintain consistency in
     * the specified region).
     */
    case NR(cacheflush):
        do_cache_op(regs->ARM_r0, regs->ARM_r1, regs->ARM_r2);
        return 0;

    case NR(usr26):
        if (!(elf_hwcap & HWCAP_26BIT))
            break;
        regs->ARM_cpsr &= ~MODE32_BIT;
        return regs->ARM_r0;

    case NR(usr32):
        if (!(elf_hwcap & HWCAP_26BIT))
            break;
        regs->ARM_cpsr |= MODE32_BIT;
        return regs->ARM_r0;

    case NR(set_tls):
        thread->tp_value = regs->ARM_r0;
        if (tls_emu)
            return 0;
        if (has_tls_reg) {
            asm ("mcr p15, 0, %0, c13, c0, 3"
                : : "r" (regs->ARM_r0));
        } else {
            /*
             * User space must never try to access this directly.
             * Expect your app to break eventually if you do so.
             * The user helper at 0xffff0fe0 must be used instead.
             * (see entry-armv.S for details)
             */
            *((unsigned int *)0xffff0ff0) = regs->ARM_r0;
        }
        return 0;

#ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG
    /*
     * Atomically store r1 in *r2 if *r2 is equal to r0 for user space.
     * Return zero in r0 if *MEM was changed or non-zero if no exchange
     * happened.  Also set the user C flag accordingly.
     * If access permissions have to be fixed up then non-zero is
     * returned and the operation has to be re-attempted.
     *
     * *NOTE*: This is a ghost syscall private to the kernel.  Only the
     * __kuser_cmpxchg code in entry-armv.S should be aware of its
     * existence.  Don't ever use this from user code.
     */
    case NR(cmpxchg):
    for (;;) {
        extern void do_DataAbort(unsigned long addr, unsigned int fsr,
                     struct pt_regs *regs);
        unsigned long val;
        unsigned long addr = regs->ARM_r2;
        struct mm_struct *mm = current->mm;
        pgd_t *pgd; pmd_t *pmd; pte_t *pte;
        spinlock_t *ptl;

        regs->ARM_cpsr &= ~PSR_C_BIT;
        down_read(&mm->mmap_sem);
        pgd = pgd_offset(mm, addr);
        if (!pgd_present(*pgd))
            goto bad_access;
        pmd = pmd_offset(pgd, addr);
        if (!pmd_present(*pmd))
            goto bad_access;
        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
        if (!pte_present(*pte) || !pte_dirty(*pte)) {
            pte_unmap_unlock(pte, ptl);
            goto bad_access;
        }
        val = *(unsigned long *)addr;
        val -= regs->ARM_r0;
        if (val == 0) {
            *(unsigned long *)addr = regs->ARM_r1;
            regs->ARM_cpsr |= PSR_C_BIT;
        }
        pte_unmap_unlock(pte, ptl);
        up_read(&mm->mmap_sem);
        return val;

        bad_access:
        up_read(&mm->mmap_sem);
        /* simulate a write access fault */
        do_DataAbort(addr, 15 + (1 << 11), regs);
    }
#endif

    default:
        /* Calls 9f00xx..9f07ff are defined to return -ENOSYS
           if not implemented, rather than raising SIGILL.  This
           way the calling program can gracefully determine whether
           a feature is supported.  */
        if ((no & 0xffff) <= 0x7ff)
            return -ENOSYS;
        break;
    }
#ifdef CONFIG_DEBUG_USER
    /*
     * experience shows that these seem to indicate that
     * something catastrophic has happened
     */
    if (user_debug & UDBG_SYSCALL) {
        printk("[%d] %s: arm syscall %d\n",
               task_pid_nr(current), current->comm, no);
        dump_instr("", regs);
        if (user_mode(regs)) {
            __show_regs(regs);
            c_backtrace(regs->ARM_fp, processor_mode(regs));
        }
    }
#endif
    info.si_signo = SIGILL;
    info.si_errno = 0;
    info.si_code  = ILL_ILLTRP;
    info.si_addr  = (void __user *)instruction_pointer(regs) -
             (thumb_mode(regs) ? 2 : 4);

    arm_notify_die("Oops - bad syscall(2)", regs, &info, no, 0);
    return 0;
}


Re: Arm Linux系统调用流程详细解析

Posted: 2025-03-06T03:16:05+00:00
by 擎天殿
还有那个sys_ni_syscall,这个函数在kernel/sys_ni.c中定义,它的作用似乎也仅仅是要给用户空间返回错误码ENOSYS。

Code: Select all


/*  we can't #include <linux/syscalls.h> here,
    but tell gcc to not warn with -Wmissing-prototypes  */
asmlinkage long sys_ni_syscall(void);

/*
 * Non-implemented system calls get redirected here.
 */
asmlinkage long sys_ni_syscall(void)
{
    return -ENOSYS;
}

系统调用号正确也好不正确也好,最终都是通过ret_fast_syscall例程来返回,同样在arch/arm/kernel/entry-common.S文件中:

Code: Select all


    .align    5
/*
 * This is the fast syscall return path.  We do as little as
 * possible here, and this includes saving r0 back into the SVC
 * stack.
 */
ret_fast_syscall:
 UNWIND(.fnstart    )
 UNWIND(.cantunwind    )
    disable_irq                @ disable interrupts
    ldr    r1, [tsk, #TI_FLAGS]
    tst    r1, #_TIF_WORK_MASK
    bne    fast_work_pending
#if defined(CONFIG_IRQSOFF_TRACER)
    asm_trace_hardirqs_on
#endif

    /* perform architecture specific actions before user return */
    arch_ret_to_user r1, lr

    restore_user_regs fast = 1, offset = S_OFF
 UNWIND(.fnend        )


Re: Arm Linux系统调用流程详细解析

Posted: 2025-03-06T03:17:38+00:00
by 擎天殿
四.声明系统调用的相关宏

linux下的系统调用函数定义接口:

1.SYSCALL_DEFINE1~6(include/linux/syscalls.h )

Code: Select all


#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)

2.SYSCALL_DEFINEx

Code: Select all


#ifdef CONFIG_FTRACE_SYSCALLS
#define SYSCALL_DEFINEx(x, sname, ...)                \
    static const char *types_##sname[] = {            \
        __SC_STR_TDECL##x(__VA_ARGS__)            \
    };                            \
    static const char *args_##sname[] = {            \
        __SC_STR_ADECL##x(__VA_ARGS__)            \
    };                            \
    SYSCALL_METADATA(sname, x);                \
    __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
#else
#define SYSCALL_DEFINEx(x, sname, ...)                \
    __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
#endif

3.__SYSCALL_DEFINEx

Code: Select all


#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS

#define SYSCALL_DEFINE(name) static inline long SYSC_##name

#define __SYSCALL_DEFINEx(x, name, ...)                    \
    asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__));        \
    static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__));    \
    asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__))        \
    {                                \
        __SC_TEST##x(__VA_ARGS__);                \
        return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__));    \
    }                                \
    SYSCALL_ALIAS(sys##name, SyS##name);                \
    static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__))

#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */

#define SYSCALL_DEFINE(name) asmlinkage long sys_##name
#define __SYSCALL_DEFINEx(x, name, ...)                    \
    asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))

#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */

4.__SC_开头的宏

Code: Select all


#define __SC_DECL1(t1, a1)    t1 a1
#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)
#define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__)
#define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__)
#define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__)

#define __SC_LONG1(t1, a1)     long a1
#define __SC_LONG2(t2, a2, ...) long a2, __SC_LONG1(__VA_ARGS__)
#define __SC_LONG3(t3, a3, ...) long a3, __SC_LONG2(__VA_ARGS__)
#define __SC_LONG4(t4, a4, ...) long a4, __SC_LONG3(__VA_ARGS__)
#define __SC_LONG5(t5, a5, ...) long a5, __SC_LONG4(__VA_ARGS__)
#define __SC_LONG6(t6, a6, ...) long a6, __SC_LONG5(__VA_ARGS__)

#define __SC_CAST1(t1, a1)    (t1) a1
#define __SC_CAST2(t2, a2, ...) (t2) a2, __SC_CAST1(__VA_ARGS__)
#define __SC_CAST3(t3, a3, ...) (t3) a3, __SC_CAST2(__VA_ARGS__)
#define __SC_CAST4(t4, a4, ...) (t4) a4, __SC_CAST3(__VA_ARGS__)
#define __SC_CAST5(t5, a5, ...) (t5) a5, __SC_CAST4(__VA_ARGS__)
#define __SC_CAST6(t6, a6, ...) (t6) a6, __SC_CAST5(__VA_ARGS__)
...

5.针对SYSCALL_DEFINE1(close, unsigned int, fd)来分析一下

SYSCALL_DEFINE1(close, unsigned int, fd)根据#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)

化简SYSCALL_DEFINEx(1, _close, __VA_ARGS__) 【 ##是连接符的意思】,根据SYSCALL_DEFINEx的定义

化简__SYSCALL_DEFINEx(1, _close, __VA_ARGS__) 根据__SYSCALL_DEFINEx的定义

Code: Select all


#define __SYSCALL_DEFINEx(1, _close, ...)                \
    asmlinkage long sys_close(__SC_DECL1(__VA_ARGS__));        \
    static inline long SYSC_close(__SC_DECL1(__VA_ARGS__));    \
    asmlinkage long SyS_close(__SC_LONG1(__VA_ARGS__))        \
    {                            \
        __SC_TEST1(__VA_ARGS__);                \
        return (long) SYSC_close(__SC_CAST1(__VA_ARGS__));    \
    }                            \
    SYSCALL_ALIAS(sys_close, SyS_close);                \
    static inline long SYSC_close(__SC_DECL1(__VA_ARGS__))

这里__VA_ARGS__是可变参数宏,可以认为等于unsigned int, fd

Re: Arm Linux系统调用流程详细解析

Posted: 2025-03-06T03:18:46+00:00
by 擎天殿
根据__SC_宏化简

Code: Select all


#define __SYSCALL_DEFINEx(1, _close, ...)                \
    asmlinkage long sys_close(unsigned int fd);            \
    static inline long SYSC_close(unsigned int fd);        \
    asmlinkage long SyS_close(long fd))                \
    {                            \
        BUILD_BUG_ON(sizeof(unsigned int) > sizeof(long))    \
        return (long) SYSC_close((unsigned int)fd);        \
    }                            \
    SYSCALL_ALIAS(sys_close, SyS_close);                \
    static inline long SYSC_close(unsigned int fd)

声明了sys_close函数

定义了SyS_close函数,函数体调用SYSC_close函数,并返回其返回值

SYSCALL_ALIAS宏

Code: Select all

#define SYSCALL_ALIAS(alias, name)                    \
    asm ("\t.globl " #alias "\n\t.set " #alias ", " #name)
插入汇编代码 让执行sys_close等同于执行SYS_close

Code: Select all

#define SYSCALL_ALIAS(alias, name)                    \
    asm ("\t.globl " #alias "\n\t.set " #alias ", " #name)
【#是预处理的意思】

BUILD_BUG_ON宏是个错误判断检测的功能

最后一句是SYSC_close的函数定义

所以在SYSCALL_DEFINE1宏定义后面紧跟的是{}包围起来的函数体

6.根据5的解析可推断出

SYSCALL_DEFINE1的'1'代表的是sys_close的参数个数为1

同理SYSCALL_DEFINE?的'/'代表的是sys_name的参数为'?'个

7.系统调用函数的定义用SYSCALL_DEFINE宏修饰

系统调用函数的外部声明在include/linux/Syscalls.h头文件中

Re: Arm Linux系统调用流程详细解析

Posted: 2025-03-06T03:20:09+00:00
by 擎天殿
5 添加新的系统调用

第一、打开arch/arm/kernel/calls.S,在最后添加系统调用的函数原型的指针,例如:

Code: Select all

CALL(sys_set_senda)
补充说明一点关于NR_syscalls的东西,这个常量表示系统调用的总的个数,在较新版本的内核中,文件arch/arm/kernel/entry-common.S中可以找到:

Code: Select all

   .equ NR_syscalls,0
#define CALL(x) .equ NR_syscalls,NR_syscalls+1
#include "calls.S"
#undef CALL
#define CALL(x) .long x
相当的巧妙,不是吗?在系统调用表中每添加一个系统调用,NR_syscalls就自动增加一。在这个地方先求出NR_syscalls,然后重新定义CALL(x)宏,这样也可以不影响文件后面系统调用表的建立。

第二、打开include/asm-arm/unistd.h,添加系统调用号的宏,感觉这步可以省略,因为这个地方定义的系统调用号主要是个C库,比如uClibc、Glibc用的。例如:

Code: Select all

    #define __NR_plan_set_senda             (__NR_SYSCALL_BASE+365)
为了向后兼容,系统调用只能增加而不能减少,这里的编号添加时,也必须按顺序来。否则会导致核心运行错误。

第三,实例化该系统调用,即编写新添加系统调用的实现例如:

Code: Select all


SYSCALL_DEFINE1(set_senda, int,iset)
{
       if(iset)
          UART_PUT_CR(&at91_port[2],AT91C_US_SENDA);
       else
          UART_PUT_CR(&at91_port[2],AT91C_US_RSTSTA);

       return 0;
}

第四、打开include/linux/syscalls.h添加函数声明

Code: Select all

asmlinkage long sys_set_senda(int iset);
第五、在应用程序中调用该系统调用,可以参考uClibc的实现。