Intel manuals:
AMD manuals:
opcode — first part of instruction encoding
instruction — Intel assembly skeleton
r/m32 = 32-bit memory or register value
64-bit mode — does instruction exist in 64-bit mode?
compat/leg mode — in 16-bit/32-bit modes?
description + operation (later on page)
flags affected
jne, etc.exceptions — how can OS be called from this?
diagram from Immae via Wikipedia
movq $0x123456789abcdef, %rax
// Intel: MOVABS RAX, 0x123456789abcdef
xor %eax, %eax
// %rax is 0, not 0x1234567800000000
movl $-1, %ebx
// Intel: MOV EBX, -1
// %rbx is 0xFFFFFFFF, not -1 (0xFFFFF...FFF)
mov $1234, %ecx to mov $1234, %rcxmovq $0x12345789abcdef, %rax
movw $0xaaaa, %ax
// %rax is 0x123456789abaaaa
movq $42, 100(%rbx,%rcx,4)mov QWORD PTR [rbx+rcx*4+100], 42memory[rbx + rcx * 4 + 100] <- 42movq $42, 100(%rbx,%rcx,4)
$%movq $42, 100(%rbx,%rcx,4)
q
l = 4; w = 2; b = 1100(%rbx,%rcx,4): memory[100 + rbx + rcx * 4]sub %rax, %rbx: rbx ← rbx - raxmov QWORD PTR [RBX + RCX * 4 + 100], 42
destination first
[…] indicates location in memory
QWORD PTR […] for 8 bytes in memory
LEA = Load Effective Address
uses the syntax of a memory access, but…
just computes the address and uses it:
leaq 4(%rax), %rax same as addq $4, %rax
leaq (%rax,%rax,4), %rax multiplies %rax by 5
address-of(memory[rax + rax * 4])leal (%rbx,%rcx), %eax adds rbx + rcx into eax
.data
string:
.asciz "abcdefgh"
.text
movq $string, %rax // mov RAX, STRING
movq string, %rdx // mov RDX, [STRING]
movb (%rax), %bl // mov BL, [RAX]
leal 1(%rbx), %ebx // lea EBX, [RBX+1]
movb %bl, (%rax) // mov [RAX], BL
movq %rdx, 4(%rax) // mov [4+RAX], RDX
What is the final value of string?
"abcdabcd""bbcdefgh""bbcdabcd""abcdefgh"objdumpobjdump --disassemble:0000000000001060 <main>:
1060: f3 0f 1e fa endbr64
1064: 50 push %rax
1065: 48 8d 3d 98 0f 00 00 lea 0xf98(%rip),%rdi # 2004 <_IO_stdin_used+0x4>
106c: e8 df ff ff ff callq 1050 <puts@plt>
1071: 31 c0 xor %eax,%eax
1073: 5a pop %rdx
1074: c3 retq
symbol main at address 0x1060
first column: instruction addresses in hexadecimal
(if executable/library has fixed address,
these are the addresses they’ll be loaded in memory)
after instruction addresses:
machine code as list of byte values in hexadecimal
callq 1050 <puts@plt> = call to address 0x1050
puts@plt is the label of that address
comment after lea annotates instruction computed:
0xf98(%rip)=0x2004 (0x4 bytes after the label _IO_stdin_used)
%xmm0 through %xmm15 (%xmm8 on 32-bit)
each holds 128-bits —
addps, etc.)addpd, etc.)paddq/d/w/b, etc.)addss, movss, etc.)addsd, movsd, etc.)
more recently: %ymm0 through %ymm15 (256-bit, ‘‘AVX’’)
%xmm X registersmultiplyEachElementOfArray:
/* %rsi = array, %rdi length,
%xmm0 multiplier */
loop: test %rdi, %rdi
je done
movss (%rsi), %xmm1
mulss %xmm0, %xmm1
movss %xmm1, (%rsi)
subq $1, %rdi
addq $4, %rsi
jmp loop
done: ret
0x1234(%rip) / Intel [RIP + 0x1234]
thing: .quad 42movq thing(%rip), %raxmovq -0x2000(%rip), %rax (not movq 0x5000…)memcpy: // copy %rdx bytes from (%rsi) to (%rdi)
cmpq %rdx, %rdx
je done
movsb
subq $1, %rdx
jmp memcpy
done: ret
movsb (move data from string to string, byte)(%rsi) to (%rdi)memcpy: // copy %rdx bytes from (%rsi) to (%rdi)
rep movsb
ret
rep prefix byte
%rdx is 0%rdx each timerep with all instructionslodsb, stosb — load/store into string
movsw, movsd — word/dword versions
string comparison instructions
rep movsb is still recommended on modern Intel
0000000000001060 <main>:
1060: f3 0f 1e fa endbr64
1064: 50 push %rax
1065: 48 8d 3d 98 0f 00 00 lea 0xf98(%rip),%rdi # 2004 <_IO_stdin_used+0x4>
106c: e8 df ff ff ff callq 1050 <puts@plt>
1071: 31 c0 xor %eax,%eax
1073: 5a pop %rdx
1074: c3 retq
addresses you’ve seen are the offsets
but every access uses a segment number!
segment numbers come from registers
default segment regsiter based on instruction type
instructions can have a segment override:
movq $42, %fs:100(%rsi) = move 42 to {segment # in FS:offset 100 + RSI}Figure from Intel manuals, Vol 3A
Figure from Intel manuals, Volume 3A
%fs, %gs
0000000000001149 <get_thread_local>:
1149: f3 0f 1e fa
endbr64
114d: 64 8b 04 25 fc ff ff ff
mov %fs:0xfffffffffffffffc,%eax
1155: c3
retq
TLS off 0x0000002df0 vaddr 0x0000003df0 paddr 0x0000003df0 align 2**2
filesz 0x0000000000 memsz 0x0000000004 flags r--
0000000000001156 <set_thread_local>:
1156: f3 0f 1e fa
endbr64
115a: 64 89 3c 25 fc ff ff ff
mov %edi,%fs:0xfffffffffffffffc
1162: c3
retq
%rdi, %rsi, %rdx, %rcx, %r8, %r9
%xmm0, %xmm1, etc.call, ret instructions assume this%raxfoo(a,b,c,d,e,f,g,h);
%xmm registers in ordervariable number of arguments
printf, scanf, …man stdarg%rax contains number of %xmm usednumbers: .float 1 .float 2 .float 3. float 4
ones: .float 1 .float 3 .float 5 .float 7
result: .float 0 .float 0 .float 0 .float 0
...
movps numbers, %xmm0
movps ones, %xmm1
addps %xmm1, %xmm0
movps %xmm0, result
/* result contains: 1+1=2,2+3=5,3+5=8,4+7=11 */
{.absolute top=“0%” left=“0%” width=1050 height=600 .my-center .fragment .fade-in-then-out fragment-index1}
{.absolute top=“0%” left=“0%” width=1050 height=600 .my-center .fragment .fade-in-then-out fragment-index2}
{.absolute top=“0%” left=“0%” width=1050 height=600 .my-center .fragment .fade-in-then-out fragment-index3}
{.absolute top=“0%” left=“0%” width=1050 height=600 .my-center .fragment .fade-in-then-out fragment-index4}
x87: 8 floating point registers
%st(0) through %st(7)arranged as a stack of registers
example: fld 0(%rbx)
|
|
before |
after |
|
|
5.0 |
(value from memory at |
|
|
6.0 |
5.0 |
|
|
7.0 |
6.0 |
|
… |
… |
… |
|
|
10.0 |
9.0 |
|
|
11.0 |
10.0 |
compiling little C programs looking at the assembly is nice:
gcc -S
.cfi directives (for try/catch)or disassemble:
gcc -c file.c (or make an executable)
objdump -dr file.o (or on an executable)
int sum(int x, int y) { return x + y; }
sum:
pushq %rbp
movq %rsp, %rbp
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -4(%rbp), %edx
movl -8(%rbp), %eax
addl %edx, %eax
popq %rbp
ret
gcc -O version:sum:
leal (%rdi,%rsi), %eax
ret
functions may freely trash these
return value register %rax
argument registers:
%rdi, %rsi, %rdx, %rcx, %r8, %r9%r11
MMX/SSE/AVX registers: %xmm0-15, etc.
floating point stack: %st(0)-%st(7)
condition codes (used by jne, etc.)
%rsp (stack pointer), %rbp (frame pointer, maybe)%r12-%r15foo:
pushq %r12 // r12 is caller-saved
... use r12 ...
popq %r12
ret
...
other_function:
pushq %r11 // r11 is caller-saved
...
callq foo
popq %r11
AT&T %reg
Intel REG
AT&T $constant
Intel constant
AT&T displacement(%base, %index, scale)
Intel [base+index*scale+displacement]
displacement (absolute)displacement(%base)displacement(,%index, scale)AT&T jmp *%rax
Intel jmp RAX
AT&T jmp *(%rax)
Intel jmp [RAX]
AT&T jmp *(%rax,%rbx,8)
Intel jmp [RAX+RBX*8]
0xA0000: lea 0x1234(%rip), %rax # 0xA123b
(Intel syntax: LEA RAX, [RIP + 0x1234])
0xA0007: add %rbx, %rax # (Intel syntax: ADD RAX, RBX)
0xA000A: jmp *(%rax) # (Intel syntax: JMP [RAX])
...
0xA123B: 0xB0000 (64-bit value)
0xA1243: 0xC0000
...
0xB0000: 0xD0000
0xB0008: 0xE0000
0xB0010: 0xF0000
...
0xC0000: 0x90000
If %rbx initially contains 0x8, then the instruction executed after the jump is at address \rule{1cm1pt}.