博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Linux kernel 4.20 socket源码分析
阅读量:7229 次
发布时间:2019-06-29

本文共 7745 字,大约阅读时间需要 25 分钟。

  hot3.png

结论

基于unix“一切皆文件”的思想,创建名为sockfs的伪文件系统(挂载点为socket:,由于不是/,所以无法直接看到,但可以通过ls -l /proc/$pid/fd/看到),实现通用文件系统的操作接口,这些接口中有socket对象的创建接口,令socket对象在创建后与file对象关联并返回相应的fd,从而使得通用的与文件相关的syscall(read/write)可以用于socket。

核心数据结构

/** *  struct socket - general BSD socket *  @state: socket state (%SS_CONNECTED, etc) *  @type: socket type (%SOCK_STREAM, etc) *  @flags: socket flags (%SOCK_NOSPACE, etc) *  @ops: protocol specific socket operations *  @file: File back pointer for gc *  @sk: internal networking protocol agnostic socket representation *  @wq: wait queue for several uses */struct socket {        socket_state            state;        short                   type;        unsigned long           flags;        struct socket_wq        *wq;        struct file             *file;        struct sock             *sk;        const struct proto_ops  *ops;};

初始化

core_initcall(sock_init);	/* early initcall */static int __init sock_init(void){	...	err = register_filesystem(&sock_fs_type);	if (err)		goto out_fs;	sock_mnt = kern_mount(&sock_fs_type);	...}static struct dentry *sockfs_mount(struct file_system_type *fs_type,			 int flags, const char *dev_name, void *data){	return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,				  sockfs_xattr_handlers,				  &sockfs_dentry_operations, SOCKFS_MAGIC);}static struct vfsmount *sock_mnt __read_mostly;static struct file_system_type sock_fs_type = {	.name =		"sockfs",	.mount =	sockfs_mount,	.kill_sb =	kill_anon_super,};static const struct super_operations sockfs_ops = {	.alloc_inode	= sock_alloc_inode,	.destroy_inode	= sock_destroy_inode,	.statfs		= simple_statfs,};

从代码中得知,伪文件系统名为sockfs,挂载点为socket:。

创建socket

SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol){	return __sys_socket(family, type, protocol);}int __sys_socket(int family, int type, int protocol){	int retval;	struct socket *sock;	int flags;	/* Check the SOCK_* constants for consistency.  */	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);	BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);	flags = type & ~SOCK_TYPE_MASK;	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))		return -EINVAL;	type &= SOCK_TYPE_MASK;	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;	retval = sock_create(family, type, protocol, &sock);	if (retval < 0)		return retval;	return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));}int sock_create(int family, int type, int protocol, struct socket **res){	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);}EXPORT_SYMBOL(sock_create);int __sock_create(struct net *net, int family, int type, int protocol,			 struct socket **res, int kern){	...	sock = sock_alloc();	if (!sock) {		net_warn_ratelimited("socket: no more sockets\n");		return -ENFILE;	/* Not exactly a match, but its the				   closest posix thing */	}	sock->type = type;#ifdef CONFIG_MODULES	/* Attempt to load a protocol module if the find failed.	 *	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user	 * requested real, full-featured networking support upon configuration.	 * Otherwise module support will break!	 */	if (rcu_access_pointer(net_families[family]) == NULL)		request_module("net-pf-%d", family);#endif	rcu_read_lock();	pf = rcu_dereference(net_families[family]);	err = -EAFNOSUPPORT;	if (!pf)		goto out_release;	/*	 * We will call the ->create function, that possibly is in a loadable	 * module, so we have to bump that loadable module refcnt first.	 */	if (!try_module_get(pf->owner))		goto out_release;	/* Now protected by module ref count */	rcu_read_unlock();	err = pf->create(net, sock, protocol, kern);	if (err < 0)		goto out_module_put;	/*	 * Now to bump the refcnt of the [loadable] module that owns this	 * socket at sock_release time we decrement its refcnt.	 */	if (!try_module_get(sock->ops->owner))		goto out_module_busy;	/*	 * Now that we're done with the ->create function, the [loadable]	 * module can have its refcnt decremented	 */	module_put(pf->owner);	err = security_socket_post_create(sock, family, type, protocol, kern);	if (err)		goto out_sock_release;	*res = sock;	return 0;	...}EXPORT_SYMBOL(__sock_create);struct socket *sock_alloc(void){	struct inode *inode;	struct socket *sock;	inode = new_inode_pseudo(sock_mnt->mnt_sb);	if (!inode)		return NULL;	sock = SOCKET_I(inode);	inode->i_ino = get_next_ino();	inode->i_mode = S_IFSOCK | S_IRWXUGO;	inode->i_uid = current_fsuid();	inode->i_gid = current_fsgid();	inode->i_op = &sockfs_inode_ops;	return sock;}EXPORT_SYMBOL(sock_alloc);static int sock_map_fd(struct socket *sock, int flags){	struct file *newfile;	int fd = get_unused_fd_flags(flags);	if (unlikely(fd < 0)) {		sock_release(sock);		return fd;	}	newfile = sock_alloc_file(sock, flags, NULL);	if (likely(!IS_ERR(newfile))) {		fd_install(fd, newfile);		return fd;	}	put_unused_fd(fd);	return PTR_ERR(newfile);}struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname){	struct file *file;	if (!dname)		dname = sock->sk ? sock->sk->sk_prot_creator->name : "";	file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,				O_RDWR | (flags & O_NONBLOCK),				&socket_file_ops);	if (IS_ERR(file)) {		sock_release(sock);		return file;	}	sock->file = file;	file->private_data = sock;	return file;}EXPORT_SYMBOL(sock_alloc_file);

用户通过socket这个syscall创建socket,在sockfs上使创建的socket与某个file关联(file->private_data = sock),并返回fd。这个fd通过file对象关联着socket,从而socket可以像文件一样被操作。

读写socket

static const struct file_operations socket_file_ops = {	.owner =	THIS_MODULE,	.llseek =	no_llseek,	.read_iter =	sock_read_iter,	.write_iter =	sock_write_iter,	.poll =		sock_poll,	.unlocked_ioctl = sock_ioctl,#ifdef CONFIG_COMPAT	.compat_ioctl = compat_sock_ioctl,#endif	.mmap =		sock_mmap,	.release =	sock_close,	.fasync =	sock_fasync,	.sendpage =	sock_sendpage,	.splice_write = generic_splice_sendpage,	.splice_read =	sock_splice_read,};static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to){	struct file *file = iocb->ki_filp;	struct socket *sock = file->private_data;	struct msghdr msg = {.msg_iter = *to,			     .msg_iocb = iocb};	ssize_t res;	if (file->f_flags & O_NONBLOCK)		msg.msg_flags = MSG_DONTWAIT;	if (iocb->ki_pos != 0)		return -ESPIPE;	if (!iov_iter_count(to))	/* Match SYS5 behaviour */		return 0;	res = sock_recvmsg(sock, &msg, msg.msg_flags);	*to = msg.msg_iter;	return res;}static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from){	struct file *file = iocb->ki_filp;	struct socket *sock = file->private_data;	struct msghdr msg = {.msg_iter = *from,			     .msg_iocb = iocb};	ssize_t res;	if (iocb->ki_pos != 0)		return -ESPIPE;	if (file->f_flags & O_NONBLOCK)		msg.msg_flags = MSG_DONTWAIT;	if (sock->type == SOCK_SEQPACKET)		msg.msg_flags |= MSG_EOR;	res = sock_sendmsg(sock, &msg);	*from = msg.msg_iter;	return res;}static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,				     int flags){	return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);}int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags){	int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);	return err ?: sock_recvmsg_nosec(sock, msg, flags);}EXPORT_SYMBOL(sock_recvmsg);static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,				     int flags){	return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);}int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags){	int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);	return err ?: sock_recvmsg_nosec(sock, msg, flags);}EXPORT_SYMBOL(sock_recvmsg);

通过sock->ops实现特定议的读写。

转载于:https://my.oschina.net/guzhou/blog/3001974

你可能感兴趣的文章
【转载】NIO客户端序列图
查看>>
poj_2709 贪心算法
查看>>
【程序员眼中的统计学(11)】卡方分布的应用
查看>>
文件夹工具类 - FolderUtils
查看>>
http://blog.csdn.net/huang_xw/article/details/7090173
查看>>
lua学习例子
查看>>
研究:印度气候变暖速度加剧 2040年或面临重灾
查看>>
python爬虫——爬取豆瓣TOP250电影
查看>>
C++与Rust操作裸指针的比较
查看>>
了解webpack-4.0版本(一)
查看>>
如何培养良好的编程风格
查看>>
Netty Channel源码分析
查看>>
基于 HTML5 WebGL 的 3D 机房
查看>>
Java编程——数据库两大神器:索引和锁
查看>>
springMvc学习笔记(2)
查看>>
吐槽Javascript系列二:数组中的splice和slice方法
查看>>
什么是Javascript函数节流?
查看>>
MQ框架的比较
查看>>
oschina
查看>>
Octave 入门
查看>>