.global foo
foo:
xor %eax, %eax // eax <- 0
foo_loop_top:
test $0x1, %edi
je foo_loop_bottom // if (edi & 1 == 0) goto for_loop_bottom
add %edi, %eax
foo_loop_bottom:
dec %edi // edi = edi - 1
jg for_loop_top // if (edi > 0) goto for_loop_top
ret
two branches could have same hashed PC
nothing in table tells us about this
possibility 1: both branches usually taken
possibility 2: both branches usually not taken
possibility 3: one branch taken, one not taken
use 1-bit predictor on this loop
what is the conditional jump misprediction rate for i % 3 == 0? for i == 50? overall?
devote more space to storing history
main goal: rare exceptions don’t immediately change prediction
example: branch taken 99% of the time
1-bit predictor: wrong about 2% of the time
new predictor: wrong about 1% of the time
000\(\;\leftrightarrow\;\)001\(\;\leftrightarrow\;\)010\(\;\leftrightarrow\;\)011\(\;\leftrightarrow\;\)100\(\;\leftrightarrow\;\)101\(\;\leftrightarrow\;\)110\(\;\leftrightarrow\;\)111000-011: not taken100-111: takenuse 2-bit predictor on this loop
what is the conditional branch misprediction rate?
for (int i = 0; i < 5; ++i)if (first or last item) {...}call: increment index, save return address in that slotret: read prediction from index, decrement indexjmp LABEL is 1st instruction we fetch
LABEL for several cyclesjle LABEL instead…
jmp *%rax or jmp *(%rax, %rcx, 8) or call *%rax or …
i = 4;
do {
...
i -= 1;
} while (i != 0);
(T = taken, N = not taken)easy cases:
just saw TTTTTT: predict T
just saw NNNNNN: predict N
just saw TNTNTN: predict T
hard cases:
TTNTTTT
(many more)
i = 10000;
do {
p = malloc(...);
if (p == NULL) goto error; // BRANCH 1
...
} while (i-- != 0); // BRANCH 2
i = 10000;
do {
if (i % 2 == 0) goto skip; // BRANCH 1
...
p = malloc(...);
if (p == NULL) goto error; // BRANCH 2
skip: ...
} while (i-- != 0); // BRANCH 3
i = 10000;
do {
if (A) goto one // BRANCH 1
...
one:
if (B) goto two // BRANCH 2
...
two:
if (A or B) goto three // BRANCH 3
...
if (A and B) goto three // BRANCH 4
...
three:
... // changes A, B
} while (i-- != 0);
for (int i = 0; i < 64; ++i)
...
loop count predictor idea: look for NNNNNNT+repeat (or TTTTTTN+repeat)
track for each possible loop branch:
known to be used on Intel
from McFarling, ‘‘Combining Branch Predictors’’ (1993)
from McFarling, ‘‘Combining Branch Predictors’’ (1993)
from McFarling, ‘‘Combining Branch Predictors’’ (1993)
branch target buffer
indirect branch predictor
what about conditional branches??? loops???