# my notes

## gcc asm

asm ( "movl $1, %eax\n\t"  
      "movl $0, %ebx\n\t"  
      "int  $0x80" );  

3）volatile修饰符
编译器会试图优化生成的汇编代码以提高性能。但对内联汇编来说，优化有时并不是好事。如果不希望编译器处理内联汇编代码，可以明确地说明。用volatile修饰符可以完成这个请求：
asm volatile ("assembly code");

asm ( 汇编语句  
    : 输出操作数     // 非必需  
    : 输入操作数     // 非必需  
    : 其他被污染的寄存器 // 非必需  
    );  

```c
#include <stdio.h>  
  
int main()  
{  
    int a=1, b=2, c=0;  
  
    // 蛋疼的 add 操作  
    asm(  
        "addl %2, %0"       // 1  
        : "=g"(c)           // 2  
        : "0"(a), "g"(b)    // 3  
        : "memory");        // 4  
  
    printf("现在c是:%d\n", c);  
    return 0;  
}
```
第1行是汇编语句，用双引号引起来， 多条语句用 ; 或者 \n\t 来分隔。
第2行是输出操作数，都是 "=?"(var) 的形式， var 可以是任意内存变量（输出结果会存到这个变量中）， ? 一般是下面这些标识符（表示内联汇编中用什么来代理这个操作数）：

a,b,c,d,S,D 分别代表 eax,ebx,ecx,edx,esi,edi 寄存器
r 上面的寄存器的任意一个（谁闲着就用谁）
m 内存
i 立即数（常量，只用于输入操作数）
g 寄存器、内存、立即数 都行（gcc你看着办）
在汇编中用 %序号 来代表这些输入/输出操作数，序号从 0 开始。为了与操作数区分开来，寄存器用两个%引出，如：%%eax

第3行是输入操作数，都是 "?"(var) 的形式， ? 除了可以是上面的那些标识符，还可以是输出操作数的序号，表示用 var 来初始化该输出操作数，上面的程序中 %0 和 %1 就是一个东西，初始化为 1（a的值）。
第4行标出那些在汇编代码中修改了的、又没有在输入/输出列表中列出的寄存器，这样 gcc 就不会擅自使用这些"危险的"寄存器。还可以用 "memory" 表示在内联汇编中修改了内存，之前缓存在寄存器中的内存变量需要重新读取

# GDT/LDT

![gdt01.png](imgs/gdt01.png)
![gdt02.png](imgs/gdt02.png)
![gdt03.png](imgs/gdt03.png)
![gdt04.png](imgs/gdt04.png)
![gdt05.png](imgs/gdt05.png)
![gdt06.png](imgs/gdt06.png)
![gdt07.png](imgs/gdt07.png)

* every LDT has an descriptor entry in GDT. The descriptor has its base address and limit

* LLDT loads the Local Descriptor Table register (LDTR). The word operand (memory or register) to LLDT should contain a **selector** to the Global Descriptor Table (GDT). The GDT entry should be a Local Descriptor Table. If so, then the LDTR is loaded from the entry. The descriptor registers DS, ES, SS, FS, GS, and CS are not affected. The LDT field in the task state segment does not change.

## paging in 386

![paging01.png](imgs/paging01.png)
![paging02.png](imgs/paging02.png)

* The level-1 is page directory, which has a page size(4K) and contains 1024 page directory entries(4B).

* The level-2 is page tables, which has a page size(4K) and containes 1024 page table entries(4B).

* So totally there are 1024 * 1024 pages = 4G memory

## IDT (Interrupt Descriptor Table)

![idt01.png](imgs/idt01.png)
![idt02.png](imgs/idt02.png)



## Task in 386

![task01.png](imgs/task01.png)
![task02.png](imgs/task02.png)


## Privilege levels

![privilege01.png](imgs/privilege01.png)
![privilege02.png](imgs/privilege02.png)

## kernel/hd.c

```c
/*
 * add-request adds a request to the linked list.
 * It sets the 'sorting'-variable when doing something
 * that interrupts shouldn't touch.
 */
static void add_request(struct hd_request * req)
{
	struct hd_request * tmp;

	if (req->nsector != 2)
		panic("nsector!=2 not implemented");
/*
 * Not to mess up the linked lists, we never touch the two first
 * entries (not this_request, as it is used by current interrups,
 * and not this_request->next, as it can be assigned to this_request).
 * This is not too high a price to pay for the ability of not
 * disabling interrupts.
 */
	sorting=1;
	if (!(tmp=this_request))
		this_request=req;
	else {
		if (!(tmp->next))
			tmp->next=req;
		else {
			tmp=tmp->next;
			for ( ; tmp->next ; tmp=tmp->next)
				if ((IN_ORDER(tmp,req) ||
				    !IN_ORDER(tmp,tmp->next)) &&
				    IN_ORDER(req,tmp->next))
					break;
			req->next=tmp->next;
			tmp->next=req;
		}
	}
	sorting=0;
/*
 * NOTE! As a result of sorting, the interrupts may have died down,
 * as they aren't redone due to locking with sorting=1. They might
 * also never have started, if this is the first request in the queue,
 * so we restart them if necessary.
 */
	if (!do_hd)
		do_request();
}
```
* Nice link list operations
* add_request maybe interperted so the two first entried won't be touched
* IN_ORDER sort by the hd,cyl,head,sector which implements the elevator algorithm


## kernel/sched.c

```c
union task_union {
	struct task_struct task;
	char stack[PAGE_SIZE];
};

static union task_union init_task = {INIT_TASK,};

long volatile jiffies=0;
long startup_time=0;
struct task_struct *current = &(init_task.task), *last_task_used_math = NULL;

struct task_struct * task[NR_TASKS] = {&(init_task.task), };
```

* using task_union to guarantee the sizeof(task) = PAGE_SIZE

-----------------------------

```c
void sleep_on(struct task_struct **p)
{
	struct task_struct *tmp;

	if (!p)
		return;
	if (current == &(init_task.task))
		panic("task[0] trying to sleep");
	tmp = *p;
	*p = current;
	current->state = TASK_UNINTERRUPTIBLE;
	schedule();
	if (tmp)
		tmp->state=0;
}
```

* this code create a linked list sleep_on process like `hard_disk <- p0 <- p1 <- p2`

`*p = current` change the target process to current. The first waked up process is p2, and then p1 ....

------------------------------------

```c
void do_timer(long cpl)
{
	if (cpl)
		current->utime++;
	else
		current->stime++;
	if ((--current->counter)>0) return;
	current->counter=0;
	if (!cpl) return;
	schedule();
}
```

1. cpl(Current Privilege Level) 0: is system time, > 0: is user time

## boot/boot.s

```asm
The area left over in the lower 640 kB is meant
| for these. No other memory is assumed to be "physical", ie all memory
| over 1Mb is demand-paging. All addresses under 1Mb are guaranteed to match
| their physical addresses.
|
| NOTE1 abouve is no longer valid in it's entirety. cache-memory is allocated
| above the 1Mb mark as well as below. Otherwise it is mainly correct.
```

1. x86 address = baseaddress * 16 + offset
----------------------

```asm
gdt:
	.word	0,0,0,0		| dummy

	.word	0x07FF		| 8Mb - limit=2047 (2048*4096=8Mb)
	.word	0x0000		| base address=0
	.word	0x9A00		| code read/exec
	.word	0x00C0		| granularity=4096, 386

	.word	0x07FF		| 8Mb - limit=2047 (2048*4096=8Mb)
	.word	0x0000		| base address=0
	.word	0x9200		| data read/write
	.word	0x00C0		| granularity=4096, 386
```

1. This is dummy gdts which is only used in boot for switch from real mode to protect mode. It just maps the lower 8Mb of addresses to the lower 8Mb of physical memory(base=0, limit=8Mb)

2. two gdt entries, one for code segment and one for data segment



## boot/head.s

```asm
/*
 *  setup_gdt
 *
 *  This routines sets up a new gdt and loads it.
 *  Only two entries are currently built, the same
 *  ones that were built in init.s. The routine
 *  is VERY complicated at two whole lines, so this
 *  rather long comment is certainly needed :-).
 *  This routine will beoverwritten by the page tables.
 */
setup_gdt:
	lgdt gdt_descr
	ret

.org 0x1000
pg0:

.org 0x2000
pg1:

.org 0x3000
pg2:		# This is not used yet, but if you
		# want to expand past 8 Mb, you'll have
		# to use it.

.org 0x4000
```

```asm

/*
 * Setup_paging
 *
 * This routine sets up paging by setting the page bit
 * in cr0. The page tables are set up, identity-mapping
 * the first 8MB. The pager assumes that no illegal
 * addresses are produced (ie >4Mb on a 4Mb machine).
 *
 * NOTE! Although all physical memory should be identity
 * mapped by this routine, only the kernel page functions
 * use the >1Mb addresses directly. All "normal" functions
 * use just the lower 1Mb, or the local data space, which
 * will be mapped to some other place - mm keeps track of
 * that.
 *
 * For those with more memory than 8 Mb - tough luck. I've
 * not got it, why should you :-) The source is here. Change
 * it. (Seriously - it shouldn't be too difficult. Mostly
 * change some constants etc. I left it at 8Mb, as my machine
 * even cannot be extended past that (ok, but it was cheap :-)
 * I've tried to show which constants to change by having
 * some kind of marker at them (search for "8Mb"), but I
 * won't guarantee that's all :-( )
 */
.align 2
setup_paging:
	movl $1024*3,%ecx
	xorl %eax,%eax
	xorl %edi,%edi			/* pg_dir is at 0x000 */
	cld;rep;stosl
	movl $pg0+7,_pg_dir		/* set present bit/user r/w */
	movl $pg1+7,_pg_dir+4		/*  --------- " " --------- */
	movl $pg1+4092,%edi
	movl $0x7ff007,%eax		/*  8Mb - 4096 + 7 (r/w user,p) */
	std
1:	stosl			/* fill pages backwards - more efficient :-) */
	subl $0x1000,%eax
	jge 1b
	xorl %eax,%eax		/* pg_dir is at 0x0000 */
	movl %eax,%cr3		/* cr3 - page directory start */
	movl %cr0,%eax
	orl $0x80000000,%eax
	movl %eax,%cr0		/* set paging (PG) bit */
	ret			/* this also flushes prefetch-queue */
```


1. page-directory entry & page-table entry

![pageentry](imgs/pageentry.png)


2. page directory starts from 0x0 till 0x1000(4K), one page-directory entry uses 4Byte, so there is 0x1000 / 4 = 1024 page tables

3. 

```
.org 0x1000
pg0:

.org 0x2000
pg1:
```

These are the first 2 entries in page directory which start at 0x1000(pg0) and 0x2000(pg1) respectively, which has 1024 page table entries. So the total memory is $2\times 1024 \times 4K = 8M$

# mm/memory.c

```c
#define invalidate() \
__asm__("movl %%eax,%%cr3"::"a" (0))

#if (BUFFER_END < 0x100000)
#define LOW_MEM 0x100000
#else
#define LOW_MEM BUFFER_END
#endif

/* these are not to be changed - thay are calculated from the above */
#define PAGING_MEMORY (HIGH_MEMORY - LOW_MEM)
#define PAGING_PAGES (PAGING_MEMORY/4096)
#define MAP_NR(addr) (((addr)-LOW_MEM)>>12)

#if (PAGING_PAGES < 10)
#error "Won't work"
#endif

#define copy_page(from,to) \
__asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024):"cx","di","si")

static unsigned short mem_map [ PAGING_PAGES ] = {0,};
```

* the lower on Mb-0x100000 = 1MB is considered as a "special" area used by kernel

* PAGING_PAGES is the number of *physical* pages actually in the system. Value of mem_map[i] is being shared(used) by n process, if n == 0, then that page is free for use.

-------------

```c

static unsigned short mem_map [ PAGING_PAGES ] = {0,};

/*
 * Get physical address of first (actually last :-) free page, and mark it
 * used. If no free pages left, return 0.
 */
unsigned long get_free_page(void)
{
register unsigned long __res asm("ax");

__asm__("std ; repne ; scasw\n\t"
	"jne 1f\n\t"
	"movw $1,2(%%edi)\n\t"
	"sall $12,%%ecx\n\t"
	"movl %%ecx,%%edx\n\t"
	"addl %2,%%edx\n\t"
	"movl $1024,%%ecx\n\t"
	"leal 4092(%%edx),%%edi\n\t"
	"rep ; stosl\n\t"
	"movl %%edx,%%eax\n"
	"1:"
	:"=a" (__res)
	:"0" (0),"i" (LOW_MEM),"c" (PAGING_PAGES),
	"D" (mem_map+PAGING_PAGES-1)
	:"di","cx","dx");
return __res;
}
```

* std: Sets the direction flag to 1, causing all subsequent string operations to decrement the index registers, (E)SI and/or (E)DI, used during the operation.

* repne: REPE/REPZ causes the succeeding string instruction to be repeated as long as the comapred bytes or words are equal (ZF = 1) and CX is not yet counted down to zero.

* scasw: The SCAS instruction is used for searching a particular character or set of characters in a string. The data item to be searched should be in AL (for SCASB), AX (for SCASW) or EAX (for SCASD) registers. The string to be searched should be in memory and pointed by the ES:DI (or EDI) register. 


* `"D" (mem_map+PAGING_PAGES-1)` set the EDI to the last page address. 

* `:"0" (0),"i" (LOW_MEM),"c" (PAGING_PAGES),` the constraint "0" for input operand says that it must occupy the same location as output operand 0, so this set the `ax` to 0, which is the search target of `scasw`

* `"movw $1,2(%%edi)\n\t"` `2(%%edi) = %%edi + 2` . This line set the count of mem_map[%%edi + 2] = 1. `scasw` has decrease the %%edi, so we should add 2 = sizeof(unsighed short).


```asm
	"sall $12,%%ecx\n\t"
	"movl %%ecx,%%edx\n\t"
	"addl %2,%%edx\n\t"
```
* `sal/shl` are two mnemonics for the same instruction. This instruction shifts each bit in the specified destination to the left and 0 is stored at LSB position.

* calculate the actual physical address for the free page. Page number is present in `ecx`. so the physical address is `4k * ecx + LOW_MEM`



```asm
	"leal 4092(%%edx),%%edi\n\t"
	"rep ; stosl\n\t"
	"movl %%edx,%%eax\n"
```
* leal = put 4092(%%edx) to %%edi, while movl get the value at 4092(%%edx) and put it to destination.

* stosl: copy data item from AL to ES:DI. because the size is 4 bytes, so it start from the 4092 = (4K - 4)

* So these codes is to fill the entire 4k page with zeros and return the address to %%eax

--------------------------------------

```c
/*
 * This function frees a continuos block of page tables, as needed
 * by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks.
 */
int free_page_tables(unsigned long from,unsigned long size)
{
	unsigned long *pg_table;
	unsigned long * dir, nr;

	if (from & 0x3fffff)
		panic("free_page_tables called with wrong alignment");
	if (!from)
		panic("Trying to free up swapper memory space");

    /* xitongsys:
        calculate the number of 4MB blocks to be freed. (1<<22 = 4MB),(0x3ffffff = 4MB - 1)
    */
	size = (size + 0x3fffff) >> 22;


    /* xitongsys
    from is the virtual address. 
	This line calculate the physical address of the page dir from 
	the virtual address. 
	The page dir base address is 0x0 and 
	one entry size is 4 bytes and total entries number is 1024.
    So the total memory size of the page dirs is (1<<12 = 4KB)

    The top 10 bits of a virtual address is the index of the page dir
	page_dir_index = (vaddr >> 22)

    every entry has a size of 4Bytes, so the physical address is 

    phy_addr_page_dir 
	= page_dir_index * 4 
	= (vaddr >> 22) << 2 = (vaddr >> 20) & 0xffc
    */
	dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */



	for ( ; size-->0 ; dir++) {
		if (!(1 & *dir))
			continue;
		pg_table = (unsigned long *) (0xfffff000 & *dir);
		for (nr=0 ; nr<1024 ; nr++) {
			if (1 & *pg_table)
				free_page(0xfffff000 & *pg_table);
			*pg_table = 0;
			pg_table++;
		}
		free_page(0xfffff000 & *dir);
		*dir = 0;
	}
	invalidate();
	return 0;
}
```
-------------------

```c
/*
 *  Well, here is one of the most complicated functions in mm. It
 * copies a range of linerar addresses by copying only the pages.
 * Let's hope this is bug-free, 'cause this one I don't want to debug :-)
 *
 * Note! We don't copy just any chunks of memory - addresses have to
 * be divisible by 4Mb (one page-directory entry), as this makes the
 * function easier. It's used only by fork anyway.
 *
 * NOTE 2!! When from==0 we are copying kernel space for the first
 * fork(). Then we DONT want to copy a full page-directory entry, as
 * that would lead to some serious memory waste - we just copy the
 * first 160 pages - 640kB. Even that is more than we need, but it
 * doesn't take any more memory - we don't copy-on-write in the low
 * 1 Mb-range, so the pages can be shared with the kernel. Thus the
 * special case for nr=xxxx.
 */
int copy_page_tables(unsigned long from,unsigned long to,long size)
{
	unsigned long * from_page_table;
	unsigned long * to_page_table;
	unsigned long this_page;
	unsigned long * from_dir, * to_dir;
	unsigned long nr;

	if ((from&0x3fffff) || (to&0x3fffff))
		panic("copy_page_tables called with wrong alignment");
	from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
	to_dir = (unsigned long *) ((to>>20) & 0xffc);
	size = ((unsigned) (size+0x3fffff)) >> 22;
	for( ; size-->0 ; from_dir++,to_dir++) {
		if (1 & *to_dir)
			panic("copy_page_tables: already exist");
		if (!(1 & *from_dir))
			continue;
		from_page_table = (unsigned long *) (0xfffff000 & *from_dir);
		if (!(to_page_table = (unsigned long *) get_free_page()))
			return -1;	/* Out of memory, see freeing */
		*to_dir = ((unsigned long) to_page_table) | 7;
		nr = (from==0)?0xA0:1024;
		for ( ; nr-- > 0 ; from_page_table++,to_page_table++) {
			this_page = *from_page_table;
			if (!(1 & this_page))
				continue;
			this_page &= ~2;
			*to_page_table = this_page;
			if (this_page > LOW_MEM) {
				*from_page_table = this_page;
				this_page -= LOW_MEM;
				this_page >>= 12;
				mem_map[this_page]++;
			}
		}
	}
	invalidate();
	return 0;
}
```

* `nr = (from==0)?0xA0:1024;` is the task0 forking ? Then copy only 640KB

* `this_page &= ~2;` mark the destination as read only

* 

```c
	this_page &= ~2;
	*to_page_table = this_page;
	if (this_page > LOW_MEM) {
		*from_page_table = this_page;
		this_page -= LOW_MEM;
		this_page >>= 12;
		mem_map[this_page]++;
	}
```

If the source page is not a kernel page(`this_page > LOW_MEM`), mark the source page alsoas "read only". We should not set the kernel pages as read only - this will cause a page fault from the kernel and page faults in kernel are not allowed.


-------------------

```c
void un_wp_page(unsigned long * table_entry)
{
	unsigned long old_page,new_page;

	old_page = 0xfffff000 & *table_entry;
	if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {
		*table_entry |= 2;
		return;
	}
	if (!(new_page=get_free_page()))
		do_exit(SIGSEGV);
	if (old_page >= LOW_MEM)
		mem_map[MAP_NR(old_page)]--;
	*table_entry = new_page | 7;
	copy_page(old_page,new_page);
}	

/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 */
void do_wp_page(unsigned long error_code,unsigned long address)
{
	un_wp_page((unsigned long *)
		(((address>>10) & 0xffc) + (0xfffff000 &
		*((unsigned long *) ((address>>20) &0xffc)))));

}
```

* COW(copy-on-write) functions. During fork(), both the source and destination pages are shared and both are marked as read only. Now suppose the parent tries to write to the shared pages, then it gets a page fault interpert and reach here. We get a new page for the parent and copies the contents of the old page to the new page and sets the new page as writable and decrements the reference count of the old page. But the child still has the old page as readonly. So when the child tries to write to the old page, it gets a page fault and comes here. But here the code says that if the page is referred to by only one process and still it is marked readonly when we get a page fault, then mark the page as writable.

---------------

## fs/exec.c

```c
/*
 * MAX_ARG_PAGES defines the number of pages allocated for arguments
 * and envelope for the new program. 32 should suffice, this gives
 * a maximum env+arg of 128kB !
 */
#define MAX_ARG_PAGES 32
```

*  We reserve 32 pages for argv, envp and the tables needed to represent them. First of all, let us explain from where the argv and envp comes into existence. It is very simple - when we call the execve function, we pass the argv and envp pointers as arguements to the function ! As simple as that. So what happens when we type a command and arguments at the shell prompt ? Well, that is also simple - the shell takes those strings an puts
it into a two dimensional array and passes it as arguements to the execve call. So what are the implications of this ? This means that the argv and envp address that the kernel gets are in the user space (data segment) - so when ever the kernel needs to access that data, kernel will have to use LDT index 0x17. Or in C code, the kernel will have to call get_user_fs or put_user_fs.

---------------

```c
#define cp_block(from,to) \
__asm__("pushl $0x10\n\t" \
	"pushl $0x17\n\t" \
	"pop %%es\n\t" \
	"cld\n\t" \
	"rep\n\t" \
	"movsl\n\t" \
	"pop %%es" \
	::"c" (BLOCK_SIZE/4),"S" (from),"D" (to) \
	:"cx","di","si")
```

* The above function does a “fast” copy by utilizing the x86 repeat instruction. In short, the rep instruction copies data from ds:si to es:di. So the es used should denote the user segment. That is why initially we do a push 0x17 and pop to es. At the end of the function, we restore es to 0x10 (kernel segment). The above function copies data from the kernel space to the user space. The first arguement is a kernel offset and the
second one is a user space offset.

----------------

![exec01.png](imgs/exec01.png)
![exec02.png](imgs/exec02.png)

```c
/*
 * create_tables() parses the env- and arg-strings in new user
 * memory and creates the pointer tables from them, and puts their
 * addresses on the "stack", returning the new stack pointer value.
 */
static unsigned long * create_tables(char * p,int argc,int envc)
{
	unsigned long *argv,*envp;
	unsigned long * sp;

	sp = (unsigned long *) (0xfffffffc & (unsigned long) p);
	sp -= envc+1;
	envp = sp;
	sp -= argc+1;
	argv = sp;
	put_fs_long((unsigned long)envp,--sp);
	put_fs_long((unsigned long)argv,--sp);
	put_fs_long((unsigned long)argc,--sp);
	while (argc-->0) {
		put_fs_long((unsigned long) p,argv++);
		while (get_fs_byte(p++)) /* nothing */ ;
	}
	put_fs_long(0,argv);
	while (envc-->0) {
		put_fs_long((unsigned long) p,envp++);
		while (get_fs_byte(p++)) /* nothing */ ;
	}
	put_fs_long(0,envp);
	return sp;
}

```

* `sp -= envc + 1` At the end of list, there is a `NULL`. This is why add 1 here.

* `argv` is a user space address and so we use `put_fs_long`

*

```c
	put_fs_long((unsigned long)envp,--sp);
	put_fs_long((unsigned long)argv,--sp);
	put_fs_long((unsigned long)argc,--sp);
```

push envp, argv, argc on stack

-------------

```c
/*
 * 'copy_string()' copies argument/envelope strings from user
 * memory to free pages in kernel mem. These are in a format ready
 * to be put directly into the top of new user memory.
 */
static unsigned long copy_strings(int argc,char ** argv,unsigned long *page,
		unsigned long p)
{
	int len,i;
	char *tmp;

	while (argc-- > 0) {
		if (!(tmp = (char *)get_fs_long(((unsigned long *) argv)+argc)))
			panic("argc is wrong");
		len=0;		/* remember zero-padding */
		do {
			len++;
		} while (get_fs_byte(tmp++));
		if (p-len < 0)		/* this shouldn't happen - 128kB */
			return 0;
		i = ((unsigned) (p-len)) >> 12;
		while (i<MAX_ARG_PAGES && !page[i]) {
			if (!(page[i]=get_free_page()))
				return 0;
			i++;
		}
		do {
			--p;
			if (!page[p/PAGE_SIZE])
				panic("nonexistent page in exec.c");
			((char *) page[p/PAGE_SIZE])[p%PAGE_SIZE] =
				get_fs_byte(--tmp);
		} while (--len);
	}
	return p;
}
```

![exec03.png](imgs/exec03.png)

* this function copies argus/envs strings from user memory to free pages in kernel mem. Copy from high memory to low memory.

* the memory pic like

high memory: [(MAX_ARG_PAGES - 4 ) * PAGE_SIZE][ 4 * PAGE_SIZE ]

So the p means the not used bytes in the first segment

|xxx <----- p bytes not used ---> |used for table|


* 

```c
	len=0;		/* remember zero-padding */
	do {
		len++;
	} while (get_fs_byte(tmp++));
	if (p-len < 0)		/* this shouldn't happen - 128kB */
		return 0;
```

get the length of this argv, which ends with NULL. If it's length larger than the left space, panic.

* 

```c
	/* xitongsys
		convert the address to page index
	*/
	i = ((unsigned) (p-len)) >> 12;
	while (i<MAX_ARG_PAGES && !page[i]) {
		if (!(page[i]=get_free_page()))
			return 0;
		i++;
	}
```

* 
```c
	do {
		--p;
		if (!page[p/PAGE_SIZE])
			panic("nonexistent page in exec.c");
		((char *) page[p/PAGE_SIZE])[p%PAGE_SIZE] =
			get_fs_byte(--tmp);
	} while (--len);
```

copy the string to free page. page is a `unsigned long *`
`((char*) page[n/PAGE_SIZE])` is the page address. `p % PAGE_SIZE` is the index of character. NICE CODE !

-------------

```c
static unsigned long change_ldt(unsigned long text_size,unsigned long * page)
{
	unsigned long code_limit,data_limit,code_base,data_base;
	int i;

	code_limit = text_size+PAGE_SIZE -1;
	code_limit &= 0xFFFFF000;
	data_limit = 0x4000000;
	code_base = get_base(current->ldt[1]);
	data_base = code_base;
	set_base(current->ldt[1],code_base);
	set_limit(current->ldt[1],code_limit);
	set_base(current->ldt[2],data_base);
	set_limit(current->ldt[2],data_limit);
/* make sure fs points to the NEW data segment */
	__asm__("pushl $0x17\n\tpop %%fs"::);
	data_base += data_limit;
	for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) {
		data_base -= PAGE_SIZE;
		if (page[i])
			put_page(page[i],data_base);
	}
	return data_limit;
}
```

*
```c
	code_limit = text_size+PAGE_SIZE -1;
	code_limit &= 0xFFFFF000;
	data_limit = 0x4000000;
```
code_limit align to PAGE_SIZE(4K). data_limit = 64MB

*
```c
	code_base = get_base(current->ldt[1]);
	data_base = code_base;
	set_base(current->ldt[1],code_base);
	set_limit(current->ldt[1],code_limit);
	set_base(current->ldt[2],data_base);
	set_limit(current->ldt[2],data_limit);
```

set base and limit in ldt

*
```c
	data_base += data_limit;
	for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) {
		data_base -= PAGE_SIZE;
		if (page[i])
			put_page(page[i],data_base);
	}
```

data_base + data_limit is the hightest address

![exec04.png](imgs/exec04.png)

---------------

```c
```