| time | Alice | Bob |
| 3:00 | look in fridge. no milk | |
| 3:05 | leave for store | |
| 3:10 | arrive at store | look in fridge. no milk |
| 3:15 | buy milk | leave for store |
| 3:20 | return home, put milk in fridge | arrive at store |
| 3:25 | buy milk | |
| 3:30 | return home, put milk in fridge |
leave a note: ‘‘I am buying milk’’
\(\approx\) setting/checking a variable (e.g. ‘‘note = 1’’)
if (no milk) {
if (no note) {
leave note;
buy milk;
remove note;
}
}
| Alice | Bob |
|---|---|
| if (no milk) { | |
| if (no note) { | |
| if (no milk) { | |
| if (no note) { | |
| leave note; | |
| buy milk; | |
| remove note; | |
| leave note; | |
| buy milk; | |
| remove note; | |
| } | |
| } | |
| } | |
| } |
leave note;
if (no milk) {
if (no note) {
buy milk;
}
}
remove note;
intuition: label notes so Alice knows which is hers (and vice-versa)
is there a solutions with writing/reading notes?
the_lockacquire:
movl $1, %eax // %eax <- 1
lock xchg %eax, the_lock // swap %eax and the_lock
// sets the_lock to 1 (taken)
// sets %eax to prior val. of the_lock
test %eax, %eax // if the_lock wasn't 0 before:
jne acquire // try again
ret
release:
mfence // for memory order reasons
movl $0, the_lock // then, set the_lock to 0 (not taken)
ret
finished: .quad 0
ThreadFinish:
_________________________
ret
ThreadWaitForFinish:
_________________________
lock xchg %eax, finished
cmp $0, %eax
____ ThreadWaitForFinish
ret
A. mfence; mov $1, finished
|
C. mov $0, %eax
|
E. je |
B. mov $1, finished; mfence
|
D. mov $1, %eax
|
F. jne |
finished: .quad 0
ThreadFinish:
__________A______________
ret
ThreadWaitForFinish: /* or without using a writing instruction: */
_________B______________ mov %eax, finished
lock xchg %eax, finished mfence
cmp $0, %eax cmp $0, %eax
__C_ ThreadWaitForFinish je ThreadWaitForFinish
ret ret
A. mfence; mov $1, finished
|
C. mov $0, %eax
|
E. je |
B. mov $1, finished; mfence
|
D. mov $1, %eax
|
F. jne |
lock abstraction is not powerful enough
spinlocks waste CPU time more than needed
spinlocks can send a lot of messages on the shared bus
while(xchg(&lk->locked, 1) != 0)
;
want: locks that wait better
instead of running infinite loop, give away CPU
lock = go to sleep, add self to list
unlock = wake up sleeping thread
struct Mutex {
SpinLock guard_spinlock;
bool lock_taken = false;
WaitQueue wait_queue;
};
LockMutex(Mutex *m) {
LockSpinlock(&m->guard_spinlock);
if (m->lock_taken) {
put current thread on m->wait_queue
mark current thread as waiting
/* xv6: myproc()->state = SLEEPING; */
UnlockSpinlock(&m->guard_spinlock);
run scheduler (context switch)
} else {
m->lock_taken = true;
UnlockSpinlock(&m->guard_spinlock);
}
}
UnlockMutex(Mutex *m) {
LockSpinlock(&m->guard_spinlock);
if (m->wait_queue not empty) {
remove a thread from m->wait_queue
mark thread as no longer waiting
/* xv6: myproc()->state = RUNNABLE; */
} else {
m->lock_taken = false;
}
UnlockSpinlock(&m->guard_spinlock);
}
spinlock protecting lock_taken and wait_queue
only held for very short amount of time (compared to mutex itself)
tracks whether any thread has locked and not unlocked
list of threads that discovered lock is taken
and are waiting for it be free
these threads are not runnable
instead of setting lock_taken to false
choose thread to hand-off lock to
subtly: if UnlockMutex runs here on another core
need to make sure scheduler on the other core doesn’t switch to thread
while it is still running (would ‘clone’ thread/mess up registers)
| core 0 (thread A) | core 1 (thread B) |
|---|---|
| start LockMutex | |
| acquire spinlock | |
| discover lock taken | |
| enqueue thread A | |
| thread A set not runnable | |
| release spinlock | start UnlockMutex |
| thread A set runnable | |
| finish UnlockMutex | |
| run scheduler | |
| scheduler switches to A | |
| … with old verison of registers | |
| thread A runs scheduler | … |
| … finally saving registers | … |
‘normal’ mutex uncontended case:
intuition: context switch only happens on interrupt
solution: disable them
x86 instructions:
cli — disable interruptssti — enable interruptsLock() {
disable interrupts;
}
Unlock() {
enable interrupts;
}
Lock(some_lock);
while (true) {}
Lock(some_lock);
read from disk
/* waits forever for (disabled) interrupt
from disk IO finishing */
Lock() {
disable interrupts;
}
Unlock() {
enable interrupts;
}
Lock(milk_lock);
if (no milk) {
Lock(store_lock);
buy milk
Unlock(store_lock);
/* interrupts enabled here?? */
}
Unlock(milk_lock);
test-and-set problem: cache block ‘‘ping-pongs’’ between caches
each transfer of block sends messages on bus
… so bus can’t be used for real work
acquire(int *the_lock) {
do {
while (ATOMIC-READ(the_lock) == 0) { /* try again */ }
} while (ATOMIC-TEST-AND-SET(the_lock) == ALREADY_SET);
}
acquire:
cmp $0, the_lock // test the lock non-atomically
// unlike lock xchg --- keeps lock in Shared state!
jne acquire // try again (still locked)
// lock possibly free
// but another processor might lock
// before we get a chance to
// ... so try wtih atomic swap:
movl $1, %eax // %eax <- 1
lock xchg %eax, the_lock // swap %eax and the_lock
// sets the_lock to 1
// sets %eax to prior value of the_lock
test %eax, %eax // if the_lock wasn't 0 (someone else got it first):
jne acquire // try again
ret
can still have a lot of attempts to modify locks after unlocked
there other spinlock designs that avoid this
futex — fast userspace mutexfutex(&lock_value, FUTEX_WAIT, expected_value, ...);
check if lock_value is expected_value
futex(…, FUTEX_WAKE is calledfutex(&lock_value, FUTEX_WAKE, num_processes);
num_processes which called FUTEX_WAITint lock_value; // UNLOCKED or LOCKED_NO_WAITERS or LOCKED_WAITERS
Lock() {
retry:
if (CompareAndSwap(&lock_value, UNLOCKED, LOCKED_NO_WAITERS) == SET) {
/* acquired lock */
return;
} else if (CompareAndSwap(&lock_value, LOCKED_NO_WAITERS, LOCKED_WAITERS) == SET) {
futex(&lock_value, FUTEX_WAIT, LOCKED_WAITERS, ...);
}
goto retry;
}
Unlock() {
if (CompareAndSwap(&lock_value, LOCKED_NO_WAITERS, UNLOCKED) == SET) {
return;
} else {
lock_value = UNLOCKED;
futex(&lock_value, FUTEX_WAKE, 1, ...);
}
}
hashtable: address \(\rightarrow\) queue of waiting threads
use hashtable to look-up queue
lock queue
check value hasn’t changed
add thread to queue
set thread as WAITING (not runnable)
unlock queue
call scheduler
woken up — queue used to set RUNNABLE
so far — everything on spinlocks
spinlocks are pretty ‘unfair’
last CPU that held spinlock more likely to get it again
but there are many other ways to spinlocks…
unsigned int serving_number;
unsigned int next_number;
Lock() {
// "take a number"
unsigned int my_number = atomic_read_and_increment(&next_number);
// wait until "now serving" that number
while (atomic_read(&serving_number) != my_number) {
/* do nothing */
}
// MISSING: code to prevent reordering reads/writes
}
Unlock() {
// serve next number
serving_number += 1;
// MISSING: code to prevent reordering reads/writes
}
still have contention to write next_number
… but no retrying writes!
threads loop performing a read repeatedly while waiting
Linux kernel used to use ticket spinlocks
now uses variant of MCS spinlocks — locks have linked-list queue!
still try
goal: even less contention
...
acquire(struct spinlock *lk) {
pushcli(); // disable interrupts to avoid deadlock
... /* this part basically just for multicore */
}
release(struct spinlock *lk)
{
... /* this part basically just for multicore */
popcli();
}
pushcli / popcli — need to be in pairs
pushcli — disable interrupts if not already
popcli — enable interrupts if corresponding pushcli disabled them
pushcli(void)
{
int eflags;
eflags = readeflags();
cli();
if (mycpu()->ncli == 0)
mycpu()->intena = eflags & FL_IF;
@2mycpu()2@->ncli += 1;
}
popcli(void)
{
if(readeflags()&FL_IF)
panic("popcli - interruptible");
if(--@2mycpu()2@->ncli < 0)
panic("popcli");
if(@2mycpu()2@->ncli == 0 && @2mycpu()2@->@3intena3@)
sti();
}
void
acquire(struct spinlock *lk)
{
pushcli(); // disable interrupts to avoid deadlock.
...
// The xchg is atomic.
while(xchg(&lk->locked, 1) != 0)
;
// Tell the C compiler and the processor to not move loads or stores
// past this point, to ensure that the critical section's memory
// references happen after the lock is acquired.
__sync_synchronize();
...
}
void
release(struct spinlock *lk)
...
// Tell the C compiler and the processor to not move loads or stores
// past this point, to ensure that all the stores in the critical
// section are visible to other cores before the lock is released.
// Both the C compiler and the hardware may re-order loads and
// stores; __sync_synchronize() tells them both not to.
__sync_synchronize();
// Release the lock, equivalent to lk->locked = 0.
// This code can't use a C assignment, since it might
// not be atomic. A real OS would use C atomics here.
asm volatile("movl $0, %0" : "+m" (lk->locked) : );
popcli();
}
void acquire(struct spinlock *lk) {
...
if(holding(lk))
panic("acquire")
...
// Record info about lock acquisition for debugging.
lk->cpu = mycpu();
getcallerpcs(&lk, lk->pcs);
}
void release(struct spinlock *lk) {
if(!holding(lk))
panic("release");
lk->pcs[0] = 0;
lk->cpu = 0;
...
}