pushq %rbx
sub $0x20,%rsp
/* copy value from thread-local storage */
mov %fs:40,%rax
/* onto the stack */
mov %rax,0x18(%rsp)
/* clear register holding value */
xor %eax, %eax
...
...
/* copy value back from stack */
mov 0x18(%rsp),%rax
/* xor to compare */
xor %fs:40,%rax
/* if result non-zero, do not return */
jne call_stack_chk_fail
ret
call_stack_chk_fail:
call __stack_chk_fail pushq %rbx
sub $0x20,%rsp
/* copy value from thread-local storage */
mov %fs:40,%rax
/* onto the stack */
mov %rax,0x18(%rsp)
/* clear register holding value */
xor %eax, %eax
...
...
/* copy value back from stack */
mov 0x18(%rsp),%rax
/* xor to compare */
xor %fs:40,%rax
/* if result non-zero, do not return */
jne call_stack_chk_fail
ret
call_stack_chk_fail:
call __stack_chk_fail%fs:40 loaded with “canary” value
setup at program start
pushq %rbx
sub $0x20,%rsp
/* copy value from thread-local storage */
mov %fs:40,%rax
/* onto the stack */
mov %rax,0x18(%rsp)
/* clear register holding value */
xor %eax, %eax
...
...
/* copy value back from stack */
mov 0x18(%rsp),%rax
/* xor to compare */
xor %fs:40,%rax
/* if result non-zero, do not return */
jne call_stack_chk_fail
ret
call_stack_chk_fail:
call __stack_chk_failvalue copied to stack just below return address
pushq %rbx
sub $0x20,%rsp
/* copy value from thread-local storage */
mov %fs:40,%rax
/* onto the stack */
mov %rax,0x18(%rsp)
/* clear register holding value */
xor %eax, %eax
...
...
/* copy value back from stack */
mov 0x18(%rsp),%rax
/* xor to compare */
xor %fs:40,%rax
/* if result non-zero, do not return */
jne call_stack_chk_fail
ret
call_stack_chk_fail:
call __stack_chk_failtrying to avoid info disclosure:
get canary value out of %rax
as soon as possible
\0 (string terminator)
%s won’t print it\xFF
‘‘StackGuard’’ — 1998 paper proposing strategy
GCC: command-line options
-fstack-protector-fstack-protector-strong-fstack-protector-allMicrosoft C/C++ compiler: /GS
less than 1% runtime if added to ‘‘risky’’ functions
large overhead if added to all functions
similar space overheads
(for typical applications)
stack canary — simplest of many mitigations
key idea: detect corruption of return address
assumption: if return address changed, so is adjacent token
assumption: attacker can’t learn true value of token
to set return address to 0x123456789, set what scores to what values?
0x123456789 =
|
0x0000 0001 2345 6789 as little-endian bytes =
|
89 67 45 23 01 00 00 00
|
[89 67 45 23] [01 00 00 00]
|
0x2345678 0x1
|
0x2345678), score 9 to 1value was stored on the stackrunning this program (input in bold):
get
67890123
aaaaaaaaYou input aaaaaaaawhatever was on stackstruct foo {
char buffer[8];
long *numbers;
};
void process(struct foo* thing) {
...
scanf("%s", thing->buffer);
...
printf("first number: %ld\n", thing->numbers[0]);
}
input: aaaaaaaa(address of canary)
p (‘‘left over’’ from prior use of register, etc.) is stored at the same address of an ‘leftover’ copy of the 8-byte stack canary. If 999999,44444,333333 is output, how do we compute the stack canary value?from Chiueh and Hsu, ‘‘RAD: A Compile-Time Solution to Buffer Overflow Attacks’’ (2001)
problem with stack: easy to leak address/values because used for lots of data
goal: keep sensitive data in separate region
function:
movq (%rsp), %rax // RAX <- return address
addq $-8, %r15 // R15 <- R15 - 8
movq %rax, (%r15) // M[R15] <- RAX
...
movq (%rsp), %rdx // RDX <- return address
cmpq %rdx, (%r15)
jne CRASH_THE_PROGRAM // if RDX != M[R15] goto CRASH_THE_PROGRAM
add $8, %r15 // R15 <- R15 - 8
ret
addq $-8, %r15
leaq after_call(%rip), %rax
movq %rax, (%r15)
jmp function
after_call:
function:
...
addq $8, %r15 // R15 <- R15 + 8
jmp *-8(%r15) // jmp M[R15-8]
\tiny{via https://clang.llvm.org/docs/ShadowCallStack.html (see also https://security.googleblog.com/2019/10/protecting-against-code-reuse-in-linux_30.html)}
dedicate register x18 to shadow stack pointer
ARM call instruction saves return address in register…
str x30, [x18], #8
stp x29, x30, [sp, #-16]!
mov x29, sp
bl bar
add w0, w0, #1
ldp x29, x30, [sp], #16
ldr x30, [x18, #-8]!
ret
stp x29, x30, [sp, #-16]!
mov x29, sp
bl bar
add w0, w0, #1
ldp x29, x30, [sp], #16
ret
-fsanitize=shadowcallstackrecent Intel processor extension adds shadow stacks
new shadow stack pointer
CALL/RET: push/pop from BOTH stacks
shadow stack also protected from writes by hardware + OS
if we change how CALL/RET works…
… maybe we can add shadow stack support to existing programs?
void Foo() {
try {
... Bar() ...
} except (std::runtime_error &error) {
...
}
}
void Bar() {
... Quux() ...
}
void Quux() {
...
throw std::runtime_error("...");
...
}
jmp_buf env;
const char *error;
void Foo() {
if (0 == setjmp(env)) {
Bar();
} else {
...
}
}
void Bar() {
... Quux() ...
}
void Quux() {
...
error = "...";
longjmp(env, 1);
...
}
exceptions and setjmp/longjmp deliberately skip return calls
one solution: ‘‘direct’’ shadow stack
fixed (possibly secret) offset from normal stack
shadow stack only stores return addreses
Intel CET has instructions to manipulate shadow stack pointer
RDSSP (read shadow stck pointer)
INCSSP (increment shadow stack pointer)
ARM64 scheme: prevent writes if
Intel CET: prevent writes unless
can we prevent writes without relying on avoiding info leaks…
and without special hardware support?
combined with a information leak that can dump arbitrary bytes of memory,
which of these exploits would shadow stacks stop…