INET Sockets Tour: Where is the roots of socket()?

Last updated 2002-01-31 10:43 am


socket(), a glibc-2.0 library function, execute a system call (software  interrupt 0x80) with a function number of 102 and a sub-function number of 1. Finally, the system call branches to sys_socket().

I can't find the definition of socket() in Linux kernel source listings!

 Do you know where is the definition of socket function?
 It is not directly described in the kernel source, but it resides in glibc-2.0.
 Let's explore the source!

Test code

 To examine the origin of socket function, I wrote a simple program.
 Do not forget to include <sys/socket.h> definition file.


#include <sys/socket.h> int main(int argc, char** argv) { int res = socket( 1, 2, 3 ); /* Call socket function */ }

 Then compile the source and generate an executable file.
 Dump the a.out using nm command.

 nm reveals that socket() is a library function from glibc-2.0.

The location of socket()

~$ gcc socktes.c
~$ nm a.out
080483d4 T main
         U socket@@GLIBC_2.0

 Next, compile the source with debugging (-g) and static link (-static) options.

 This version of a.out is so huge (the former is 4.7K bytes and this one is 246K bytes!), but it includes a body of socket().

 Execute gdb command.

 Finally, you will find the substance of socket function.

It simply activates int 0x80 with a function number of 102 (decimal) and a sub-function number 1.

Compile, link statically and disassemble

~$ gcc -static -g socktes.c
~$ gdb a.out
GNU gdb 19990928
Copyright 1998 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "i686-pc-linux-gnu"...
(gdb) disassemble socket
Dump of assembler code for function socket:
0x804ca90 <socket>:     mov    %ebx,%edx
0x804ca92 <socket+2>:   mov    $0x66,%eax
0x804ca97 <socket+7>:   mov    $0x1,%ebx
0x804ca9c <socket+12>:  lea    0x4(%esp,1),%ecx
0x804caa0 <socket+16>:  int    $0x80
0x804caa2 <socket+18>:  mov    %edx,%ebx
0x804caa4 <socket+20>:  cmp    $0xffffff83,%eax
0x804caa7 <socket+23>:  jae    0x804ccd0 <__syscall_error>
0x804caad <socket+29>:  ret    
0x804caae <socket+30>:  nop    
0x804caaf <socket+31>:  nop    
End of assembler dump.

Into the kernel

 There is a system call table (sys_call_table) in the kernel.
 102nd entry of the table is sys_socketcall.


        pushl %eax                      # save orig_eax
        cmpl $(NR_syscalls),%eax
        jae badsys
        testb $0x02,tsk_ptrace(%ebx)    # PT_TRACESYS
        jne tracesys
        call *SYMBOL_NAME(sys_call_table)(,%eax,4)
        movl %eax,EAX(%esp)             # save the return value

        .long SYMBOL_NAME(sys_fstatfs)          /* 100 */
        .long SYMBOL_NAME(sys_ioperm)
        .long SYMBOL_NAME(sys_socketcall)       /* 102 */

 Sub-function number 1 is mapped to sys_socket()


#define SYS_SOCKET      1               /* sys_socket(2)                */
#define SYS_BIND        2               /* sys_bind(2)                  */
#define SYS_CONNECT     3               /* sys_connect(2)               */
#define SYS_LISTEN      4               /* sys_listen(2)                */
#define SYS_ACCEPT      5               /* sys_accept(2)                */
#define SYS_GETSOCKNAME 6               /* sys_getsockname(2)           */
#define SYS_GETPEERNAME 7               /* sys_getpeername(2)           */
#define SYS_SOCKETPAIR  8               /* sys_socketpair(2)            */
#define SYS_SEND        9               /* sys_send(2)                  */
#define SYS_RECV        10              /* sys_recv(2)                  */
#define SYS_SENDTO      11              /* sys_sendto(2)                */
#define SYS_RECVFROM    12              /* sys_recvfrom(2)              */
#define SYS_SHUTDOWN    13              /* sys_shutdown(2)              */
#define SYS_SETSOCKOPT  14              /* sys_setsockopt(2)            */
#define SYS_GETSOCKOPT  15              /* sys_getsockopt(2)            */
#define SYS_SENDMSG     16              /* sys_sendmsg(2)               */
#define SYS_RECVMSG     17              /* sys_recvmsg(2)               */

 Here is an entrance to the sockets world!.


 *      System call vectors. 
 *      Argument checking cleaned up. Saved 20% in size.
 *  This function doesn't need to set the kernel lock because
 *  it is set by the callees. 

asmlinkage long sys_socketcall(int call, unsigned long *args)
        unsigned long a[6];
        unsigned long a0,a1;
        int err;

                return -EINVAL;

        /* copy_from_user should be SMP safe. */
        if (copy_from_user(a, args, nargs[call]))
                return -EFAULT;
                case SYS_SOCKET:
                        err = sys_socket(a0,a1,a[2]);
                case SYS_BIND:
                        err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
                case SYS_CONNECT:
                        err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
                case SYS_LISTEN:
                        err = sys_listen(a0,a1);
                case SYS_ACCEPT:
                        err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
                case SYS_GETSOCKNAME:
                        err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
                case SYS_GETPEERNAME:
                        err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
                case SYS_SOCKETPAIR:
                        err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
                case SYS_SEND:
                        err = sys_send(a0, (void *)a1, a[2], a[3]);
                case SYS_SENDTO:
                        err = sys_sendto(a0,(void *)a1, a[2], a[3],
                                         (struct sockaddr *)a[4], a[5]);
                case SYS_RECV:
                        err = sys_recv(a0, (void *)a1, a[2], a[3]);
                case SYS_RECVFROM:
                        err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
                                           (struct sockaddr *)a[4], (int *)a[5]);
                case SYS_SHUTDOWN:
                        err = sys_shutdown(a0,a1);
                case SYS_SETSOCKOPT:
                        err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
                case SYS_GETSOCKOPT:
                        err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
                case SYS_SENDMSG:
                        err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
                case SYS_RECVMSG:
                        err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
                        err = -EINVAL;
        return err;

 Finally, we reached sys_cocket() from socket()!


asmlinkage long sys_socket(int family, int type, int protocol)
        int retval;
        struct socket *sock;

        retval = sock_create(family, type, protocol, &sock);
        if (retval < 0)
                goto out;

        retval = sock_map_fd(sock);
        if (retval < 0)
                goto out_release;

        /* It may be already another descriptor 8) Not kernel problem. */
        return retval;

        return retval;

int sock_create(int family, int type, int protocol, struct socket **res) { int i; struct socket *sock; /* * Check protocol is in range */ if(family<0 || family>=NPROTO) return -EINVAL; ..... net_family_read_lock(); if (net_families[family] == NULL) { i = -EINVAL; goto out; } /* * Allocate the socket and allow the family to set things up. if * the protocol is 0, the family is instructed to select an appropriate * default. */ if (!(sock = sock_alloc())) { printk(KERN_WARNING "socket: no more sockets\n"); i = -ENFILE; /* Not exactly a match, but its the closest posix thing */ goto out; } sock->type = type; if ((i = net_families[family]->create(sock, protocol)) < 0) { sock_release(sock); goto out; } *res = sock; out: net_family_read_unlock(); return i; } /** * sock_alloc - allocate a socket * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes * NULL is returned. */ struct socket *sock_alloc(void) { struct inode * inode; struct socket * sock; inode = get_empty_inode(); if (!inode) return NULL; sock = socki_lookup(inode); inode->i_mode = S_IFSOCK|S_IRWXUGO; inode->i_sock = 1; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; sock->inode = inode; init_waitqueue_head(&sock->wait); sock->fasync_list = NULL; sock->state = SS_UNCONNECTED; sock->flags = 0; sock->ops = NULL; sock->sk = NULL; sock->file = NULL; sockets_in_use[smp_processor_id()].counter++; return sock; } extern __inline__ struct socket *socki_lookup(struct inode *inode) { return &inode->u.socket_i; } /* * Obtains the first available file descriptor and sets it up for use. * * This functions creates file structure and maps it to fd space * of current process. On success it returns file descriptor * and file struct implicitly stored in sock->file. * Note that another thread may close file descriptor before we return * from this function. We use the fact that now we do not refer * to socket after mapping. If one day we will need it, this * function will inincrement ref. count on file by 1. * * In any case returned fd MAY BE not valid! * This race condition is inavoidable * with shared fd spaces, we cannot solve is inside kernel, * but we take care of internal coherence yet. */ static int sock_map_fd(struct socket *sock) { int fd; /* * Find a file descriptor suitable for return to the user. */ fd = get_unused_fd(); if (fd >= 0) { struct file *file = get_empty_filp(); if (!file) { put_unused_fd(fd); fd = -ENFILE; goto out; } file->f_dentry = d_alloc_root(sock->inode); /* MOUNT_REWRITE: set to sockfs internal vfsmnt */ file->f_vfsmnt = NULL; if (!file->f_dentry) { put_filp(file); put_unused_fd(fd); fd = -ENOMEM; goto out; } sock->file = file; file->f_op = &socket_file_ops; file->f_mode = 3; file->f_flags = O_RDWR; file->f_pos = 0; fd_install(fd, file); } out: return fd; } /* * Socket files have a set of 'special' operations as well as the generic file ones. Th ese don't appear * in the operation structures but are done directly via the socketcall() multiplexor. */ static struct file_operations socket_file_ops = { llseek: sock_lseek, read: sock_read, write: sock_write, poll: sock_poll, ioctl: sock_ioctl, mmap: sock_mmap, open: sock_no_open, /* special open code to disallow open via /proc */ release: sock_close, fasync: sock_fasync, readv: sock_readv, writev: sock_writev };

To be continued.