日期:2014-05-16  浏览次数:20942 次

Linux协议栈之BSD和INET socket层(一)

Linux的BSD和INET? socket层分别对应于ISO的表示层和会话层,其中有两个比较重要的数据结构:

struct socket
{
	socket_state		state;

	unsigned long		flags;
	struct proto_ops	*ops;
	struct inode		*inode;
	struct fasync_struct	*fasync_list;	/* Asynchronous wake up list	*/
	struct file		*file;		/* File back pointer for gc	*/
	struct sock		*sk;
	wait_queue_head_t	wait;

	short			type;
	unsigned char		passcred;
};

?

其中state表示socket当前的状态,file和inode指向文件系统的文件描述符和节点信息。每个文件描述符都要对应一个inode节点,其中file->f_inode指向inode节点。

?

struct proto_ops *ops是一个L6层的处理函数,pops数组将在sock_register中初始化,对应于不同的作用域将被初始化为不同的值。fasync_list用于异步唤醒,对应的函数是sock_wake_async()。

?

而struct sock *sk 就是我们要说的另外一个非常重要的数据结构,相对于socket只应用于L6层,sock贯穿于L2~L5层并且做过各层之间贯穿的一个纽带。

?

struct sock {
	/* Socket demultiplex comparisons on incoming packets. */
	__u32			daddr;		/* Foreign IPv4 addr			*/
	__u32			rcv_saddr;	/* Bound local IPv4 addr		*/
	__u16			dport;		/* Destination port			*/
	unsigned short		num;		/* Local port				*/
	int			bound_dev_if;	/* Bound device index if != 0		*/

	/* Main hash linkage for various protocol lookup tables. */
	struct sock		*next;
	struct sock		**pprev;
	struct sock		*bind_next;
	struct sock		**bind_pprev;

	volatile unsigned char	state,		/* Connection state			*/
				zapped;		/* In ax25 & ipx means not linked	*/
	__u16			sport;		/* Source port				*/

	unsigned short		family;		/* Address family			*/
	unsigned char		reuse;		/* SO_REUSEADDR setting			*/
	unsigned char		shutdown;
	atomic_t		refcnt;		/* Reference count			*/

	socket_lock_t		lock;		/* Synchronizer...			*/
	int			rcvbuf;		/* Size of receive buffer in bytes	*/

	wait_queue_head_t	*sleep;		/* Sock wait queue			*/
	struct dst_entry	*dst_cache;	/* Destination cache			*/
	rwlock_t		dst_lock;
	atomic_t		rmem_alloc;	/* Receive queue bytes committed	*/
	struct sk_buff_head	receive_queue;	/* Incoming packets			*/
	atomic_t		wmem_alloc;	/* Transmit queue bytes committed	*/
	struct sk_buff_head	write_queue;	/* Packet sending queue			*/
	atomic_t		omem_alloc;	/* "o" is "option" or "other" */
	int			wmem_queued;	/* Persistent queue size */
	int			forward_alloc;	/* Space allocated forward. */
	__u32			saddr;		/* Sending source			*/
	unsigned int		allocation;	/* Allocation mode			*/
	int			sndbuf;		/* Size of send buffer in bytes		*/
	struct sock		*prev;

	/* Not all are volatile, but some are, so we might as well say they all are.
	 * XXX Make this a flag word -DaveM
	 */
	volatile char		dead,
				done,
				urginline,
				keepopen,
				linger,
				destroy,
				no_check,
				broadcast,
				bsdism;
	unsigned char		debug;
	unsigned char		rcvtstamp;
	unsigned char		use_write_queue;
	unsigned char		userlocks;
	/* Hole of 3 bytes. Try to pack. */
	int			route_caps;
	int			proc;
	unsigned long	        lingertime;

	int			hashent;
	struct sock		*pair;

	/* The backlog queue is special, it is always used with
	 * the per-socket spinlock held and requires low latency
	 * access.  Therefore we special case it's implementation.
	 */
	struct {
		struct sk_buff *head;
		struct sk_buff *tail;
	} backlog;

	rwlock_t		callback_lock;

	/* Error queue, rarely used. */
	struct sk_buff_head	error_queue;

	struct proto		*prot;

#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
	union {
		struct ipv6_pinfo	af_inet6;
	} net_pinfo;
#endif

	union {
		struct tcp_opt		af_tcp;
#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
		struct raw_opt		tp_raw4;
#endif
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
		struct raw6_opt		tp_raw;
#endif /* CONFIG_IPV6 */
#if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
		struct spx_opt		af_spx;
#endif /* CONFIG_SPX */

	} tp_pinfo;

	int			err, err_soft;	/* Soft holds errors that don't
						   cause failure but are the cause
						   of a persistent failure not just
						   'timed out' */
	unsigned short		ack_backlog;
	unsigned short		max_ack_backlog;
	__u