assume: addresses don’t leak
choose random addresses each time
enough possibilities that attacker won’t ‘‘get lucky’’
should prevent exploits — can’t write GOT/shellcode location
...
EXEC_P, D_PAGED
...
LOAD off 0x0000000 vaddr 0x400000 paddr 0x0400000 align 2**12
filesz 0x00006c8 memsz 0x0006c8 flags r--
LOAD off 0x0001000 vaddr 0x401000 paddr 0x0401000 align 2**12
filesz 0x01a7865 memsz 0x1a7865 flags r-x
some executables had LOADs at fixed addresses
can’t randomize program addresses
others did not (marked DYNAMIC)
...
HAS_SYMS, DYNAMIC, D_PAGED
...
LOAD off 0x000000 vaddr 0x000000 paddr 0x000000 align 2**12
filesz 0x0036f8 memsz 0x0036f8 flags r--
LOAD off 0x004000 vaddr 0x004000 paddr 0x004000 align 2**12
...
0 and 0x3F FFFF0x7FFF FFFF FFFF - random number \(\times\) 0x1000
0x1000 because OS has to allocate whole pages (0x1000 bytes)suppose we have 32-bit Linux server vulnerable to stack smashing
… but stack address randomized with 256 possible starting locations
server is automatically restarted after unsuccessful attack
suppose stack layout is 8KB buffer + return address + 12KB other stuff
what should attacker do to maximize chance of success?
about how many tries needed for successful attack?
Which initial value for p (‘‘left over’’ from prior use of register, etc.) would be most useful for a later buffer overflow attack?
p is an invalid pointer and accessing it will crash the programp points to global variablep points to space on the stack that is currently unallocated, but last contained an input bufferp points to space on the stack that currently holds a return addressp points to space on the stack that is currently unallocated, but last contained a pointer to the last used byte of an array on the stack$ objdump -x foo.exe
...
LOAD off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**12
filesz 0x0000000000000620 memsz 0x0000000000000620 flags r--
LOAD off 0x0000000000001000 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**12
filesz 0x0000000000000205 memsz 0x0000000000000205 flags r-x
LOAD off 0x0000000000002000 vaddr 0x0000000000002000 paddr 0x0000000000002000 align 2**12
filesz 0x0000000000000150 memsz 0x0000000000000150 flags r--
LOAD off 0x0000000000002db8 vaddr 0x0000000000003db8 paddr 0x0000000000003db8 align 2**12
filesz 0x000000000000025c memsz 0x0000000000000260 flags rw-
0000000000001050 <__printf_chk@plt>:
1050: f3 0f 1e fa endbr64
1054: f2 ff 25 75 2f 00 00 bnd jmpq *0x2f75(%rip) # 3fd0 <__printf_chk@GLIBC_2.3.4>
105b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
0000000000001060 <main>:
1060: f3 0f 1e fa endbr64
1064: 50 push %rax
1065: 8b 15 a5 2f 00 00 mov 0x2fa5(%rip),%edx # 4010 <global>
106b: 48 8d 35 92 0f 00 00 lea 0xf92(%rip),%rsi # 2004 <_IO_stdin_used+0x4>
1072: 31 c0 xor %eax,%eax
1074: bf 01 00 00 00 mov $0x1,%edi
1079: e8 d2 ff ff ff callq 1050 <__printf_chk@plt>
Part 1: What address is most likely leaked by the above?
class Foo { virtual const char *bar() { ... } };
...
Foo *f = new Foo;
char *p = new char[1024];
printf("%s\n", f);
if leaked value was 0x822003 and in a debugger (with different randomization):
which of the above can I compute based on the leak?
VTable pointer part of same object/library containing class Foo definition
so can use its location to find code/data from same executable
can’t use it to find things on heap, stack, in C library
printf("buffer = %p", buffer)
—
buffer = 0x646d06d15040
—
$ objdump -tR a.out
...
0000000000004040 g O .bss 0000000000000400 buffer
...
0000000000003fb0 R_X86_64_JUMP_SLOT strlen@GLIBC_2.2.5
$ objdump -d a.out
...
0000000000001090 <strlen@plt>:
1090: f3 0f 1e fa endbr64
1094: ff 25 16 2f 00 00 jmp *0x2f16(%rip) # 3fb0 <strlen@GLIBC_2.2.5>
109a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
...
buffer address = 0x646d06d15040 - offset = 0x4040
offset = 0x646d06d11000
GOT entry address = 0x3fb0 + offset = 0x646d06d14fb0
ASLR seems like no-brainer
big problem: performance/code size impacts
(smaller problem: inconsistent behavior when bugs)
position-independent code = code that can be loaded anywhere
necessary prerequisite for most of ASLR
Unix did this for libraries for non-security reasons
but not other programs, probably because of overheads
Windows will edit code to relocate
typically one fixed location per program/library per boot
fixup once per program/library per boot
Windows + Visual Studio had ‘full’ ASLR by default since 2010
foo:
movl $3, %eax
cmpq $5, %rdi
ja defaultCase
jmp *lookupTable(,%rdi,8)
returnOne:
movl $1, %eax
ret
returnTwo:
movl $2, %eax
defaultCase:
ret
lookupTable:
.quad returnOne
.quad returnTwo
.quad returnOne
.quad returnTwo
.quad returnOne
.quad returnOne
00000000000007ab <foo>:
b8 03 00 00 00 mov $0x3,%eax
48 83 ff 05 cmp $0x5,%rdi
77 1b ja 7d0 <foo+0x25>
48 8d 05 ab 00 00 00 lea 0xab(%rip), %rax # 868
48 63 14 b8 movslq (%rax,%rdi,4), %rdx
48 01 d0 add %rdx,%rax
ff e0 jmpq *%rax
b8 02 00 00 00 mov $0x2,%eax
c3 retq
b8 01 00 00 00 mov $0x1,%eax
c3 retq
...
@ 868: -156 /* offset */
@ 870: -162
...
jmp *jumpTable(,%rdi,8)
lea (get table address — with relative offset)movslq (do table lookup of offset)add (add to base)jmp (to computed base)mov, lea, …// BEFORE: (fixed addresses)
08048310 <__printf_chk@plt>:
8048310: ff 25 10 a0 04 08 jmp *0x804a010
/* 0x804a010 == global offset table entry */
// AFTER: (position-independent)
00000490 <__printf_chk@plt>:
490: ff a3 10 00 00 00 jmp *0x10(%ebx)
/* %ebx --- address of global offset table */
/* needs to be set by caller */
// BEFORE: (fixed addresses)
8049061: 68 08 a0 04 08 push $0x804a008
8049066: e8 d5 ff ff ff call 8049040 <puts@plt>
// AFTER: (position-independent)
000010d0 <__x86.get_pc_thunk.bx>:
10d0: 8b 1c 24 mov (%esp),%ebx
10d3: c3 ret
...
106e: e8 5d 00 00 00 call 10d0 <__x86.get_pc_thunk.bx>
1073: 81 c3 65 2f 00 00 add $0x2f65,%ebx
...
107d: 8d 83 30 e0 ff ff lea -0x1fd0(%ebx),%eax
1083: 50 push %eax
1084: e8 b7 ff ff ff call 1040 <puts@plt>
struct Foo {
virtual const char *bar() { return "Foo::bar"; }
};
int main() {
Foo *f = new Foo;
f->bar();
}
$ objdump -R example2
example2: file format elf64-x86-64
DYNAMIC RELOCATION RECORDS
OFFSET TYPE VALUE
0000000000003da8 R_X86_64_RELATIVE *ABS*+0x0000000000001160
0000000000003db0 R_X86_64_RELATIVE *ABS*+0x0000000000001120
0000000000004008 R_X86_64_RELATIVE *ABS*+0x0000000000004008
0000000000003fd8 R_X86_64_GLOB_DAT __cxa_finalize@GLIBC_2.2.5
0000000000003fe0 R_X86_64_GLOB_DAT _ITM_deregisterTMCloneTable
0000000000003fe8 R_X86_64_GLOB_DAT __libc_start_main@GLIBC_2.2.5
0000000000003ff0 R_X86_64_GLOB_DAT __gmon_start__
0000000000003ff8 R_X86_64_GLOB_DAT _ITM_registerTMCloneTable
0000000000003fd0 R_X86_64_JUMP_SLOT _Znwm@GLIBCXX_3.4
$ objdump -R example2-nopie
example2-nopie: file format elf64-x86-64
DYNAMIC RELOCATION RECORDS
OFFSET TYPE VALUE
0000000000403ff0 R_X86_64_GLOB_DAT __libc_start_main@GLIBC_2.2.5
0000000000403ff8 R_X86_64_GLOB_DAT __gmon_start__
0000000000404018 R_X86_64_JUMP_SLOT _Znwm@GLIBCXX_3.4
-fPIC: generate position-independent code for library
-fPIE, -fpie: generate position-independent code for executable
-pie: link position-independent executable
-shared: link shared library
extern int foo;
int example() {return foo;}
with -fPIC:
0000000000000000 <example>:
0: 48 8b 05 00 00 00 00 mov 0x0(%rip),%rax # 7 <example+0x7>
3: R_X86_64_REX_GOTPCRELX foo-0x4
7: 8b 00 mov (%rax),%eax
9: c3 ret
with -fPIE:
0000000000000000 <example>:
0: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 6 <example+0x6>
2: R_X86_64_PC32 foo-0x4
6: c3 ret
saw two different relocations for global int foo:
R_X86_64_PC32 relocation = 32-bit offset to variable
foo isR_X86_64_REX_GOTPCRELX relocation = 32-bit offset to global offset table entry containing address
foo’s location decided at runtime by linkerPayer, ‘‘Too much PIE is bad for performance’’, ETH Zurich Tech Report
DYNAMICBASE linker option