# linux 1.0 notes

## net/inet/skbuff

```c
struct sk_buff {
  unsigned long			magic_debug_cookie;
  struct sk_buff		*volatile next;
  struct sk_buff		*volatile prev;
  struct sk_buff		*volatile link3;
  struct sk_buff		*volatile* list;
  struct sock			*sk;
  volatile unsigned long	when;	/* used to compute rtt's	*/
  struct device			*dev;
  void				*mem_addr;
  union {
	struct tcphdr	*th;
	struct ethhdr	*eth;
	struct iphdr	*iph;
	struct udphdr	*uh;
	struct arphdr	*arp;
	unsigned char	*raw;
	unsigned long	seq;
#ifdef CONFIG_IPX	
	ipx_packet	*ipx;
#endif	
  } h;
  struct iphdr		*ip_hdr;		/* For IPPROTO_RAW */
  unsigned long			mem_len;
  unsigned long 		len;
  unsigned long			fraglen;
  struct sk_buff		*fraglist;	/* Fragment list */
  unsigned long			truesize;
  unsigned long 		saddr;
  unsigned long 		daddr;
  int				magic;
  volatile char 		acked,
				used,
				free,
				arp;
  unsigned char			tries,lock;	/* Lock is now unused */
  unsigned short		users;		/* User count - see datagram.c (and soon seqpacket.c/stream.c) */
  unsigned long			padding[0];
  unsigned char			data[0];
};
```

* last field are data buffer. So this struct has a dynamic size. 

* 
![skbuff01.png](imgs/skbuff01.png)

------------

```c
/*
 *	Get a clone of an sk_buff. This is the safe way to peek at
 *	a socket queue without accidents. Its a bit long but most
 *	of it acutally ends up as tiny bits of inline assembler
 *	anyway. Only the memcpy of upto 4K with ints off is not
 *	as nice as I'd like.
 */

struct sk_buff *skb_peek_copy(struct sk_buff *volatile* list)
{
	struct sk_buff *orig,*newsk;
	unsigned long flags;
	unsigned int len;
	/* Now for some games to avoid races */

	do
	{
		save_flags(flags);
		cli();
		orig=skb_peek(list);
		if(orig==NULL)
		{
			restore_flags(flags);
			return NULL;
		}
		IS_SKB(orig);
		len=orig->truesize;
		restore_flags(flags);

		newsk=alloc_skb(len,GFP_KERNEL);	/* May sleep */

		if(newsk==NULL)		/* Oh dear... not to worry */
			return NULL;

		save_flags(flags);
		cli();
		if(skb_peek(list)!=orig)	/* List changed go around another time */
		{
			restore_flags(flags);
			newsk->sk=NULL;
			newsk->free=1;
			newsk->mem_addr=newsk;
			newsk->mem_len=len;
			kfree_skb(newsk, FREE_WRITE);
			continue;
		}

		IS_SKB(orig);
		IS_SKB(newsk);
		memcpy(newsk,orig,len);
		newsk->list=NULL;
		newsk->magic=0;
		newsk->next=NULL;
		newsk->prev=NULL;
		newsk->mem_addr=newsk;
		newsk->h.raw+=((char *)newsk-(char *)orig);
		newsk->link3=NULL;
		newsk->sk=NULL;
		newsk->free=1;
	}
	while(0);

	restore_flags(flags);
	return(newsk);
}

```

* There are a lot of `cli();` in this file. It is used to ensure the interupt flag is cleaned for some funcions will use `sti()` inside.

* `do{...}while(0)` nice trick. We can use `continue` inside and if the condition statisfied, it will go out from the loop. No need a `break`

* `newsk=alloc_skb(len,GFP_KERNEL);	/* May sleep */` when use alloc memory, we may swap out some memory page, this will cause a sleep because of writing to disk.

* volatile and pointer

![skbuff02.png](imgs/skbuff02.png)
![skbuff03.png](imgs/skbuff03.png)

So `struct sk_buff *volatile* list` means

`*list` is volatile, which is the the sk_buff pointer, but the pointer ot pointer `**list` is not.

------------

## net/inet/sock

```c
#define SOCK_ARRAY_SIZE	64

struct proto {
  struct sk_buff *	(*wmalloc)(struct sock *sk,
				    unsigned long size, int force,
				    int priority);
  ....

  struct sock *		sock_array[SOCK_ARRAY_SIZE];
  char			name[80];
};

```

* `struct sock *		sock_array[SOCK_ARRAY_SIZE];` this is a hash table to record which ports are used. The key is port number.

```c
static int
sk_inuse(struct proto *prot, int num)
{
  struct sock *sk;

  for(sk = prot->sock_array[num & (SOCK_ARRAY_SIZE -1 )];
      sk != NULL;
      sk=sk->next) {
	if (sk->num == num) return(1);
  }
  return(0);
}
```

This function is to check the port usage. Classical resolving hash conflict method.

-------------

```c
#define PROT_SOCK	1024	/* Sockets 0-1023 can't be bound too unless you are superuser */


unsigned short
get_new_socknum(struct proto *prot, unsigned short base)
{
  static int start=0;

  /*
   * Used to cycle through the port numbers so the
   * chances of a confused connection drop.
   */
  int i, j;
  int best = 0;
  int size = 32767; /* a big num. */
  struct sock *sk;

  if (base == 0) base = PROT_SOCK+1+(start % 1024);
  if (base <= PROT_SOCK) {
	base += PROT_SOCK+(start % 1024);
  }

  /* Now look through the entire array and try to find an empty ptr. */
  for(i=0; i < SOCK_ARRAY_SIZE; i++) {
	j = 0;
	sk = prot->sock_array[(i+base+1) &(SOCK_ARRAY_SIZE -1)];
	while(sk != NULL) {
		sk = sk->next;
		j++;
	}
	if (j == 0) {
		start =(i+1+start )%1024;
		DPRINTF((DBG_INET, "get_new_socknum returning %d, start = %d\n",
							i + base + 1, start));
		return(i+base+1);
	}

  /* xitongsys
  Find the shortest linked-list port
  */
	if (j < size) {
		best = i;
		size = j;
	}
  }

  /* Now make sure the one we want is not in use. */
  while(sk_inuse(prot, base +best+1)) {
	best += SOCK_ARRAY_SIZE;
  }
  DPRINTF((DBG_INET, "get_new_socknum returning %d, start = %d\n",
						best + base + 1, start));
  return(best+base+1);
}
```

* protocol has an array of sock `sock_array` with `PORT_SOCK` size. This array is a hash table for recording the ports used for each protocol. Using a linked-list method to resolve the hast conflict. 

------------

```c
void
put_sock(unsigned short num, struct sock *sk)
{
  struct sock *sk1;
  struct sock *sk2;
  int mask;

  DPRINTF((DBG_INET, "put_sock(num = %d, sk = %X\n", num, sk));
  sk->num = num;
  sk->next = NULL;
  num = num &(SOCK_ARRAY_SIZE -1);

  /* We can't have an interupt re-enter here. */
  cli();
  if (sk->prot->sock_array[num] == NULL) {
	sk->prot->sock_array[num] = sk;
	sti();
	return;
  }
  sti();

  /* xitongsys
    addr is in big endian order. So this mask is not the subset mask(actually is reversed)
  */
  for(mask = 0xff000000; mask != 0xffffffff; mask = (mask >> 8) | mask) {
	if ((mask & sk->saddr) &&
	    (mask & sk->saddr) != (mask & 0xffffffff)) {
		mask = mask << 8;
		break;
	}
  }
  DPRINTF((DBG_INET, "mask = %X\n", mask));

  cli();
  sk1 = sk->prot->sock_array[num];

  /* xitongsys
  the list is sorted by the addr
  */
  for(sk2 = sk1; sk2 != NULL; sk2=sk2->next) {
	if (!(sk2->saddr & mask)) {
		if (sk2 == sk1) {
			sk->next = sk->prot->sock_array[num];
			sk->prot->sock_array[num] = sk;
			sti();
			return;
		}
		sk->next = sk2;
		sk1->next= sk;
		sti();
		return;
	}
	sk1 = sk2;
  }

  /* Goes at the end. */
  sk->next = NULL;
  sk1->next = sk;
  sti();
}
```

--------------

```c
static int
inet_listen(struct socket *sock, int backlog)
{
  struct sock *sk;

  sk = (struct sock *) sock->data;
  if (sk == NULL) {
	printk("Warning: sock->data = NULL: %d\n" ,__LINE__);
	return(0);
  }

  /* We may need to bind the socket. */
  if (sk->num == 0) {
	sk->num = get_new_socknum(sk->prot, 0);
	if (sk->num == 0) return(-EAGAIN);
	put_sock(sk->num, sk);
	sk->dummy_th.source = ntohs(sk->num);
  }

  /* We might as well re use these. */ 
  sk->max_ack_backlog = backlog;
  if (sk->state != TCP_LISTEN) {
	sk->ack_backlog = 0;
	sk->state = TCP_LISTEN;
  }
  return(0);
}

```

* `max_ack_backlog` max number of accept queue

---------------

```c
/* The peer socket should always be NULL. */
static int
inet_release(struct socket *sock, struct socket *peer)
{
  struct sock *sk;

  sk = (struct sock *) sock->data;
  if (sk == NULL) return(0);

  DPRINTF((DBG_INET, "inet_release(sock = %X, peer = %X)\n", sock, peer));
  sk->state_change(sk);

  /* Start closing the connection.  This may take a while. */
  /*
   * If linger is set, we don't return until the close
   * is complete.  Other wise we return immediately. The
   * actually closing is done the same either way.
   */
  if (sk->linger == 0) {
	sk->prot->close(sk,0);
	sk->dead = 1;
  } else {
	DPRINTF((DBG_INET, "sk->linger set.\n"));
	sk->prot->close(sk, 0);
	cli();

    /* xitongsys
    lingertime to wait
    */
	if (sk->lingertime)
		current->timeout = jiffies + HZ*sk->lingertime;
	while(sk->state != TCP_CLOSE && current->timeout>0) {
		interruptible_sleep_on(sk->sleep);
		if (current->signal & ~current->blocked) {
			break;
#if 0
			/* not working now - closes can't be restarted */
			sti();
			current->timeout=0;
			return(-ERESTARTSYS);
#endif
		}
	}
	current->timeout=0;
	sti();
	sk->dead = 1;
  }
  sk->inuse = 1;

  /* This will destroy it. */
  release_sock(sk);
  sock->data = NULL;
  DPRINTF((DBG_INET, "inet_release returning\n"));
  return(0);
}

```

* TIME_WAIT and linger

![linger01.png](imgs/linger01.png)
![linger02.png](imgs/linger02.png)
![linger03.png](imgs/linger03.png)

-------

## net/inet/ip

```c
/* IP flags. */
#define IP_CE		0x8000		/* Flag: "Congestion"		*/
#define IP_DF		0x4000		/* Flag: "Don't Fragment"	*/
#define IP_MF		0x2000		/* Flag: "More Fragments"	*/
#define IP_OFFSET	0x1FFF		/* "Fragment Offset" part	*/
```

![ip01](imgs/ip01.png)
![ip02](imgs/ip02.png)
![ip03](imgs/ip03.png)

* the offset unit is 8 Bytes

------------

```c
struct iphdr {
  unsigned char		ihl:4,
			version:4;
  unsigned char		tos;
  unsigned short	tot_len;
  unsigned short	id;
  unsigned short	frag_off;
  unsigned char		ttl;
  unsigned char		protocol;
  unsigned short	check;
  unsigned long		saddr;
  unsigned long		daddr;
  /*The options start here. */
};
```

* IPv4 header

![ip04](imgs/ip04.png)

-----------------