"I am a person who works hard and plays hard."

Yuan Wei
Second Year Graduate Student Department of Computer Science
University of Virginia Charlottesville, VA 22903
Email: yw3f@cs.virginia.edu


Source Code Analysis

Main Page   Compound List   File List   Compound Members   File Members  

bpred.c

Go to the documentation of this file.
00001 /*
00002  * bpred.c - branch predictor routines
00003  *
00004  * This file is a part of the SimpleScalar tool suite written by
00005  * Todd M. Austin as a part of the Multiscalar Research Project.
00006  *  
00007  * The tool suite is currently maintained by Doug Burger and Todd M. Austin.
00008  * 
00009  * Copyright (C) 1994, 1995, 1996, 1997, 1998 by Todd M. Austin
00010  *
00011  * This source file is distributed "as is" in the hope that it will be
00012  * useful.  The tool set comes with no warranty, and no author or
00013  * distributor accepts any responsibility for the consequences of its
00014  * use. 
00015  * 
00016  * Everyone is granted permission to copy, modify and redistribute
00017  * this tool set under the following conditions:
00018  * 
00019  *    This source code is distributed for non-commercial use only. 
00020  *    Please contact the maintainer for restrictions applying to 
00021  *    commercial use.
00022  *
00023  *    Permission is granted to anyone to make or distribute copies
00024  *    of this source code, either as received or modified, in any
00025  *    medium, provided that all copyright notices, permission and
00026  *    nonwarranty notices are preserved, and that the distributor
00027  *    grants the recipient permission for further redistribution as
00028  *    permitted by this document.
00029  *
00030  *    Permission is granted to distribute this file in compiled
00031  *    or executable form under the same conditions that apply for
00032  *    source code, provided that either:
00033  *
00034  *    A. it is accompanied by the corresponding machine-readable
00035  *       source code,
00036  *    B. it is accompanied by a written offer, with no time limit,
00037  *       to give anyone a machine-readable copy of the corresponding
00038  *       source code in return for reimbursement of the cost of
00039  *       distribution.  This written offer must permit verbatim
00040  *       duplication by anyone, or
00041  *    C. it is distributed by someone who received only the
00042  *       executable form, and is accompanied by a copy of the
00043  *       written offer of source code that they received concurrently.
00044  *
00045  * In other words, you are welcome to use, share and improve this
00046  * source file.  You are forbidden to forbid anyone else to use, share
00047  * and improve what you give them.
00048  *
00049  * INTERNET: dburger@cs.wisc.edu
00050  * US Mail:  1210 W. Dayton Street, Madison, WI 53706
00051  *
00052  * $Id: bpred.c,v 1.1.1.1 2000/05/26 15:18:57 taustin Exp $
00053  *
00054  * $Log: bpred.c,v $
00055  * Revision 1.1.1.1  2000/05/26 15:18:57  taustin
00056  * SimpleScalar Tool Set
00057  *
00058  *
00059  * Revision 1.4  1998/08/27 07:56:23  taustin
00060  * implemented host interface description in host.h
00061  * return address stack (RAS) performance stats improved
00062  * explicit BTB sizing option added to branch predictors, use
00063  *       "-btb" option to configure BTB
00064  * added target configuration parameters to control branch
00065  *       predictor indexing
00066  * L2 index computation is more "compatible" to McFarling's
00067  *       verison of it, i.e., if the PC xor address component is only
00068  *       part of the index, take the lower order address bits for the
00069  *       other part of the index, rather than the higher order ones
00070  * return address stack (RAS) bug fixed (improves pred perf)
00071  *
00072  * Revision 1.1.1.1  1997/05/22 00:33:18  aklauser
00073  *
00074  * Revision 1.11  1997/05/01 20:23:00  skadron
00075  * BTB bug fixes; jumps no longer update direction state; non-taken
00076  *    branches non longer update BTB
00077  *
00078  * Revision 1.10  1997/05/01 00:05:42  skadron
00079  * Separated BTB from direction-predictor
00080  *
00081  * Revision 1.9  1997/04/30  01:42:42  skadron
00082  * 1. Not aggressively returning the BTB target regardless of hit on jump's,
00083  *    but instead returning just "taken" when it's a BTB miss yields an
00084  *    apparent epsilon performance improvement for cc1 and perl.
00085  * 2. Bug fix: if no retstack, treat return's as any other jump
00086  *
00087  * Revision 1.8  1997/04/29  23:50:33  skadron
00088  * Added r31 info to distinguish between return-JRs and other JRs for bpred
00089  *
00090  * Revision 1.7  1997/04/29  22:53:04  skadron
00091  * Hopefully bpred is now right: bpred now allocates entries only for
00092  *    branches; on a BTB miss it still returns a direction; and it uses a
00093  *    return-address stack.  Returns are not yet distinguished among JR's
00094  *
00095  * Revision 1.6  1997/04/28  17:37:02  skadron
00096  * Bpred now allocates entries for any instruction instead of only
00097  *    branches; also added return-address stack
00098  *
00099  * Revision 1.5  1997/04/24  16:57:21  skadron
00100  * Bpred used to return no prediction if the indexing branch didn't match
00101  *    in the BTB.  Now it can predict a direction even on a BTB address
00102  *    conflict
00103  *
00104  * Revision 1.4  1997/03/27  16:31:52  skadron
00105  * Fixed bug: sim-outorder calls bpred_after_priming(), even if no bpred
00106  *    exists.  Now we check for a null ptr.
00107  *
00108  * Revision 1.3  1997/03/25  16:16:33  skadron
00109  * Statistics now take account of priming: statistics report only
00110  *    post-prime info.
00111  *
00112  * Revision 1.2  1997/02/24  18:02:41  skadron
00113  * Fixed output format of a formula stat
00114  *
00115  * Revision 1.1  1997/02/16  22:23:54  skadron
00116  * Initial revision
00117  *
00118  *
00119  */
00120 
00121 #include <stdio.h>
00122 #include <stdlib.h>
00123 #include <math.h>
00124 #include <assert.h>
00125 
00126 #include "host.h"
00127 #include "misc.h"
00128 #include "machine.h"
00129 #include "bpred.h"
00130 
00131 /* turn this on to enable the SimpleScalar 2.0 RAS bug */
00132 /* #define RAS_BUG_COMPATIBLE */
00133 
00134 /* create a branch predictor */
00135 struct bpred_t *                        /* branch predictory instance */
00136 bpred_create(enum bpred_class class,    /* type of predictor to create */
00137              unsigned int bimod_size,   /* bimod table size */
00138              unsigned int l1size,       /* 2lev l1 table size */
00139              unsigned int l2size,       /* 2lev l2 table size */
00140              unsigned int meta_size,    /* meta table size */
00141              unsigned int shift_width,  /* history register width */
00142              unsigned int xor,          /* history xor address flag */
00143              unsigned int btb_sets,     /* number of sets in BTB */ 
00144              unsigned int btb_assoc,    /* BTB associativity */
00145              unsigned int retstack_size) /* num entries in ret-addr stack */
00146 {
00147   struct bpred_t *pred;
00148 
00149   if (!(pred = calloc(1, sizeof(struct bpred_t))))
00150     fatal("out of virtual memory");
00151 
00152   pred->class = class;
00153 
00154   switch (class) {
00155   case BPredComb:
00156     /* bimodal component */
00157     pred->dirpred.bimod = 
00158       bpred_dir_create(BPred2bit, bimod_size, 0, 0, 0);
00159 
00160     /* 2-level component */
00161     pred->dirpred.twolev = 
00162       bpred_dir_create(BPred2Level, l1size, l2size, shift_width, xor);
00163 
00164     /* metapredictor component */
00165     pred->dirpred.meta = 
00166       bpred_dir_create(BPred2bit, meta_size, 0, 0, 0);
00167 
00168     break;
00169 
00170   case BPred2Level:
00171     pred->dirpred.twolev = 
00172       bpred_dir_create(class, l1size, l2size, shift_width, xor);
00173 
00174     break;
00175 
00176   case BPred2bit:
00177     pred->dirpred.bimod = 
00178       bpred_dir_create(class, bimod_size, 0, 0, 0);
00179 
00180   case BPredTaken:
00181   case BPredNotTaken:
00182     /* no other state */
00183     break;
00184 
00185   default:
00186     panic("bogus predictor class");
00187   }
00188 
00189   /* allocate ret-addr stack */
00190   switch (class) {
00191   case BPredComb:
00192   case BPred2Level:
00193   case BPred2bit:
00194     {
00195       int i;
00196 
00197       /* allocate BTB */
00198       if (!btb_sets || (btb_sets & (btb_sets-1)) != 0)
00199         fatal("number of BTB sets must be non-zero and a power of two");
00200       if (!btb_assoc || (btb_assoc & (btb_assoc-1)) != 0)
00201         fatal("BTB associativity must be non-zero and a power of two");
00202 
00203       if (!(pred->btb.btb_data = calloc(btb_sets * btb_assoc,
00204                                         sizeof(struct bpred_btb_ent_t))))
00205         fatal("cannot allocate BTB");
00206 
00207       pred->btb.sets = btb_sets;
00208       pred->btb.assoc = btb_assoc;
00209 
00210       if (pred->btb.assoc > 1)
00211         for (i=0; i < (pred->btb.assoc*pred->btb.sets); i++)
00212           {
00213             if (i % pred->btb.assoc != pred->btb.assoc - 1)
00214               pred->btb.btb_data[i].next = &pred->btb.btb_data[i+1];
00215             else
00216               pred->btb.btb_data[i].next = NULL;
00217             
00218             if (i % pred->btb.assoc != pred->btb.assoc - 1)
00219               pred->btb.btb_data[i+1].prev = &pred->btb.btb_data[i];
00220           }
00221 
00222       /* allocate retstack */
00223       if ((retstack_size & (retstack_size-1)) != 0)
00224         fatal("Return-address-stack size must be zero or a power of two");
00225       
00226       pred->retstack.size = retstack_size;
00227       if (retstack_size)
00228         if (!(pred->retstack.stack = calloc(retstack_size, 
00229                                             sizeof(struct bpred_btb_ent_t))))
00230           fatal("cannot allocate return-address-stack");
00231       pred->retstack.tos = retstack_size - 1;
00232       
00233       break;
00234     }
00235 
00236   case BPredTaken:
00237   case BPredNotTaken:
00238     /* no other state */
00239     break;
00240 
00241   default:
00242     panic("bogus predictor class");
00243   }
00244 
00245   return pred;
00246 }
00247 
00248 /* create a branch direction predictor */
00249 struct bpred_dir_t *            /* branch direction predictor instance */
00250 bpred_dir_create (
00251   enum bpred_class class,       /* type of predictor to create */
00252   unsigned int l1size,          /* level-1 table size */
00253   unsigned int l2size,          /* level-2 table size (if relevant) */
00254   unsigned int shift_width,     /* history register width */
00255   unsigned int xor)             /* history xor address flag */
00256 {
00257   struct bpred_dir_t *pred_dir;
00258   unsigned int cnt;
00259   int flipflop;
00260 
00261   if (!(pred_dir = calloc(1, sizeof(struct bpred_dir_t))))
00262     fatal("out of virtual memory");
00263 
00264   pred_dir->class = class;
00265 
00266   cnt = -1;
00267   switch (class) {
00268   case BPred2Level:
00269     {
00270       if (!l1size || (l1size & (l1size-1)) != 0)
00271         fatal("level-1 size, `%d', must be non-zero and a power of two", 
00272               l1size);
00273       pred_dir->config.two.l1size = l1size;
00274       
00275       if (!l2size || (l2size & (l2size-1)) != 0)
00276         fatal("level-2 size, `%d', must be non-zero and a power of two", 
00277               l2size);
00278       pred_dir->config.two.l2size = l2size;
00279       
00280       if (!shift_width || shift_width > 30)
00281         fatal("shift register width, `%d', must be non-zero and positive",
00282               shift_width);
00283       pred_dir->config.two.shift_width = shift_width;
00284       
00285       pred_dir->config.two.xor = xor;
00286       pred_dir->config.two.shiftregs = calloc(l1size, sizeof(int));
00287       if (!pred_dir->config.two.shiftregs)
00288         fatal("cannot allocate shift register table");
00289       
00290       pred_dir->config.two.l2table = calloc(l2size, sizeof(unsigned char));
00291       if (!pred_dir->config.two.l2table)
00292         fatal("cannot allocate second level table");
00293 
00294       /* initialize counters to weakly this-or-that */
00295       flipflop = 1;
00296       for (cnt = 0; cnt < l2size; cnt++)
00297         {
00298           pred_dir->config.two.l2table[cnt] = flipflop;
00299           flipflop = 3 - flipflop;
00300         }
00301 
00302       break;
00303     }
00304 
00305   case BPred2bit:
00306     if (!l1size || (l1size & (l1size-1)) != 0)
00307       fatal("2bit table size, `%d', must be non-zero and a power of two", 
00308             l1size);
00309     pred_dir->config.bimod.size = l1size;
00310     if (!(pred_dir->config.bimod.table =
00311           calloc(l1size, sizeof(unsigned char))))
00312       fatal("cannot allocate 2bit storage");
00313     /* initialize counters to weakly this-or-that */
00314     flipflop = 1;
00315     for (cnt = 0; cnt < l1size; cnt++)
00316       {
00317         pred_dir->config.bimod.table[cnt] = flipflop;
00318         flipflop = 3 - flipflop;
00319       }
00320 
00321     break;
00322 
00323   case BPredTaken:
00324   case BPredNotTaken:
00325     /* no other state */
00326     break;
00327 
00328   default:
00329     panic("bogus branch direction predictor class");
00330   }
00331 
00332   return pred_dir;
00333 }
00334 
00335 /* print branch direction predictor configuration */
00336 void
00337 bpred_dir_config(
00338   struct bpred_dir_t *pred_dir, /* branch direction predictor instance */
00339   char name[],                  /* predictor name */
00340   FILE *stream)                 /* output stream */
00341 {
00342   switch (pred_dir->class) {
00343   case BPred2Level:
00344     fprintf(stream,
00345       "pred_dir: %s: 2-lvl: %d l1-sz, %d bits/ent, %s xor, %d l2-sz, direct-mapped\n",
00346       name, pred_dir->config.two.l1size, pred_dir->config.two.shift_width,
00347       pred_dir->config.two.xor ? "" : "no", pred_dir->config.two.l2size);
00348     break;
00349 
00350   case BPred2bit:
00351     fprintf(stream, "pred_dir: %s: 2-bit: %d entries, direct-mapped\n",
00352       name, pred_dir->config.bimod.size);
00353     break;
00354 
00355   case BPredTaken:
00356     fprintf(stream, "pred_dir: %s: predict taken\n", name);
00357     break;
00358 
00359   case BPredNotTaken:
00360     fprintf(stream, "pred_dir: %s: predict not taken\n", name);
00361     break;
00362 
00363   default:
00364     panic("bogus branch direction predictor class");
00365   }
00366 }
00367 
00368 /* print branch predictor configuration */
00369 void
00370 bpred_config(struct bpred_t *pred,      /* branch predictor instance */
00371              FILE *stream)              /* output stream */
00372 {
00373   switch (pred->class) {
00374   case BPredComb:
00375     bpred_dir_config (pred->dirpred.bimod, "bimod", stream);
00376     bpred_dir_config (pred->dirpred.twolev, "2lev", stream);
00377     bpred_dir_config (pred->dirpred.meta, "meta", stream);
00378     fprintf(stream, "btb: %d sets x %d associativity", 
00379             pred->btb.sets, pred->btb.assoc);
00380     fprintf(stream, "ret_stack: %d entries", pred->retstack.size);
00381     break;
00382 
00383   case BPred2Level:
00384     bpred_dir_config (pred->dirpred.twolev, "2lev", stream);
00385     fprintf(stream, "btb: %d sets x %d associativity", 
00386             pred->btb.sets, pred->btb.assoc);
00387     fprintf(stream, "ret_stack: %d entries", pred->retstack.size);
00388     break;
00389 
00390   case BPred2bit:
00391     bpred_dir_config (pred->dirpred.bimod, "bimod", stream);
00392     fprintf(stream, "btb: %d sets x %d associativity", 
00393             pred->btb.sets, pred->btb.assoc);
00394     fprintf(stream, "ret_stack: %d entries", pred->retstack.size);
00395     break;
00396 
00397   case BPredTaken:
00398     bpred_dir_config (pred->dirpred.bimod, "taken", stream);
00399     break;
00400   case BPredNotTaken:
00401     bpred_dir_config (pred->dirpred.bimod, "nottaken", stream);
00402     break;
00403 
00404   default:
00405     panic("bogus branch predictor class");
00406   }
00407 }
00408 
00409 /* print predictor stats */
00410 void
00411 bpred_stats(struct bpred_t *pred,       /* branch predictor instance */
00412             FILE *stream)               /* output stream */
00413 {
00414   fprintf(stream, "pred: addr-prediction rate = %f\n",
00415           (double)pred->addr_hits/(double)(pred->addr_hits+pred->misses));
00416   fprintf(stream, "pred: dir-prediction rate = %f\n",
00417           (double)pred->dir_hits/(double)(pred->dir_hits+pred->misses));
00418 }
00419 
00420 /* register branch predictor stats */
00421 void
00422 bpred_reg_stats(struct bpred_t *pred,   /* branch predictor instance */
00423                 struct stat_sdb_t *sdb) /* stats database */
00424 {
00425   char buf[512], buf1[512], *name;
00426 
00427   /* get a name for this predictor */
00428   switch (pred->class)
00429     {
00430     case BPredComb:
00431       name = "bpred_comb";
00432       break;
00433     case BPred2Level:
00434       name = "bpred_2lev";
00435       break;
00436     case BPred2bit:
00437       name = "bpred_bimod";
00438       break;
00439     case BPredTaken:
00440       name = "bpred_taken";
00441       break;
00442     case BPredNotTaken:
00443       name = "bpred_nottaken";
00444       break;
00445     default:
00446       panic("bogus branch predictor class");
00447     }
00448 
00449   sprintf(buf, "%s.lookups", name);
00450   stat_reg_counter(sdb, buf, "total number of bpred lookups",
00451                    &pred->lookups, 0, NULL);
00452   sprintf(buf, "%s.updates", name);
00453   sprintf(buf1, "%s.dir_hits + %s.misses", name, name);
00454   stat_reg_formula(sdb, buf, "total number of updates", buf1, "%12.0f");
00455   sprintf(buf, "%s.addr_hits", name);
00456   stat_reg_counter(sdb, buf, "total number of address-predicted hits", 
00457                    &pred->addr_hits, 0, NULL);
00458   sprintf(buf, "%s.dir_hits", name);
00459   stat_reg_counter(sdb, buf, 
00460                    "total number of direction-predicted hits "
00461                    "(includes addr-hits)", 
00462                    &pred->dir_hits, 0, NULL);
00463   if (pred->class == BPredComb)
00464     {
00465       sprintf(buf, "%s.used_bimod", name);
00466       stat_reg_counter(sdb, buf, 
00467                        "total number of bimodal predictions used", 
00468                        &pred->used_bimod, 0, NULL);
00469       sprintf(buf, "%s.used_2lev", name);
00470       stat_reg_counter(sdb, buf, 
00471                        "total number of 2-level predictions used", 
00472                        &pred->used_2lev, 0, NULL);
00473     }
00474   sprintf(buf, "%s.misses", name);
00475   stat_reg_counter(sdb, buf, "total number of misses", &pred->misses, 0, NULL);
00476   sprintf(buf, "%s.jr_hits", name);
00477   stat_reg_counter(sdb, buf,
00478                    "total number of address-predicted hits for JR's",
00479                    &pred->jr_hits, 0, NULL);
00480   sprintf(buf, "%s.jr_seen", name);
00481   stat_reg_counter(sdb, buf,
00482                    "total number of JR's seen",
00483                    &pred->jr_seen, 0, NULL);
00484   sprintf(buf, "%s.jr_non_ras_hits.PP", name);
00485   stat_reg_counter(sdb, buf,
00486                    "total number of address-predicted hits for non-RAS JR's",
00487                    &pred->jr_non_ras_hits, 0, NULL);
00488   sprintf(buf, "%s.jr_non_ras_seen.PP", name);
00489   stat_reg_counter(sdb, buf,
00490                    "total number of non-RAS JR's seen",
00491                    &pred->jr_non_ras_seen, 0, NULL);
00492   sprintf(buf, "%s.bpred_addr_rate", name);
00493   sprintf(buf1, "%s.addr_hits / %s.updates", name, name);
00494   stat_reg_formula(sdb, buf,
00495                    "branch address-prediction rate (i.e., addr-hits/updates)",
00496                    buf1, "%9.4f");
00497   sprintf(buf, "%s.bpred_dir_rate", name);
00498   sprintf(buf1, "%s.dir_hits / %s.updates", name, name);
00499   stat_reg_formula(sdb, buf,
00500                   "branch direction-prediction rate (i.e., all-hits/updates)",
00501                   buf1, "%9.4f");
00502   sprintf(buf, "%s.bpred_jr_rate", name);
00503   sprintf(buf1, "%s.jr_hits / %s.jr_seen", name, name);
00504   stat_reg_formula(sdb, buf,
00505                   "JR address-prediction rate (i.e., JR addr-hits/JRs seen)",
00506                   buf1, "%9.4f");
00507   sprintf(buf, "%s.bpred_jr_non_ras_rate.PP", name);
00508   sprintf(buf1, "%s.jr_non_ras_hits.PP / %s.jr_non_ras_seen.PP", name, name);
00509   stat_reg_formula(sdb, buf,
00510                    "non-RAS JR addr-pred rate (ie, non-RAS JR hits/JRs seen)",
00511                    buf1, "%9.4f");
00512   sprintf(buf, "%s.retstack_pushes", name);
00513   stat_reg_counter(sdb, buf,
00514                    "total number of address pushed onto ret-addr stack",
00515                    &pred->retstack_pushes, 0, NULL);
00516   sprintf(buf, "%s.retstack_pops", name);
00517   stat_reg_counter(sdb, buf,
00518                    "total number of address popped off of ret-addr stack",
00519                    &pred->retstack_pops, 0, NULL);
00520   sprintf(buf, "%s.used_ras.PP", name);
00521   stat_reg_counter(sdb, buf,
00522                    "total number of RAS predictions used",
00523                    &pred->used_ras, 0, NULL);
00524   sprintf(buf, "%s.ras_hits.PP", name);
00525   stat_reg_counter(sdb, buf,
00526                    "total number of RAS hits",
00527                    &pred->ras_hits, 0, NULL);
00528   sprintf(buf, "%s.ras_rate.PP", name);
00529   sprintf(buf1, "%s.ras_hits.PP / %s.used_ras.PP", name, name);
00530   stat_reg_formula(sdb, buf,
00531                    "RAS prediction rate (i.e., RAS hits/used RAS)",
00532                    buf1, "%9.4f");
00533 }
00534 
00535 void
00536 bpred_after_priming(struct bpred_t *bpred)
00537 {
00538   if (bpred == NULL)
00539     return;
00540 
00541   bpred->lookups = 0;
00542   bpred->addr_hits = 0;
00543   bpred->dir_hits = 0;
00544   bpred->used_ras = 0;
00545   bpred->used_bimod = 0;
00546   bpred->used_2lev = 0;
00547   bpred->jr_hits = 0;
00548   bpred->jr_seen = 0;
00549   bpred->misses = 0;
00550   bpred->retstack_pops = 0;
00551   bpred->retstack_pushes = 0;
00552   bpred->ras_hits = 0;
00553 }
00554 
00555 #define BIMOD_HASH(PRED, ADDR)                                          \
00556   ((((ADDR) >> 19) ^ ((ADDR) >> MD_BR_SHIFT)) & ((PRED)->config.bimod.size-1))
00557     /* was: ((baddr >> 16) ^ baddr) & (pred->dirpred.bimod.size-1) */
00558 
00559 /* predicts a branch direction */
00560 char *                                          /* pointer to counter */
00561 bpred_dir_lookup(struct bpred_dir_t *pred_dir,  /* branch dir predictor inst */
00562                  md_addr_t baddr)               /* branch address */
00563 {
00564   unsigned char *p = NULL;
00565 
00566   /* Except for jumps, get a pointer to direction-prediction bits */
00567   switch (pred_dir->class) {
00568     case BPred2Level:
00569       {
00570         int l1index, l2index;
00571 
00572         /* traverse 2-level tables */
00573         l1index = (baddr >> MD_BR_SHIFT) & (pred_dir->config.two.l1size - 1);
00574         l2index = pred_dir->config.two.shiftregs[l1index];
00575         if (pred_dir->config.two.xor)
00576           {
00577 #if 1
00578             /* this L2 index computation is more "compatible" to McFarling's
00579                verison of it, i.e., if the PC xor address component is only
00580                part of the index, take the lower order address bits for the
00581                other part of the index, rather than the higher order ones */
00582             l2index = (((l2index ^ (baddr >> MD_BR_SHIFT))
00583                         & ((1 << pred_dir->config.two.shift_width) - 1))
00584                        | ((baddr >> MD_BR_SHIFT)
00585                           << pred_dir->config.two.shift_width));
00586 #else
00587             l2index = l2index ^ (baddr >> MD_BR_SHIFT);
00588 #endif
00589           }
00590         else
00591           {
00592             l2index =
00593               l2index
00594                 | ((baddr >> MD_BR_SHIFT) << pred_dir->config.two.shift_width);
00595           }
00596         l2index = l2index & (pred_dir->config.two.l2size - 1);
00597 
00598         /* get a pointer to prediction state information */
00599         p = &pred_dir->config.two.l2table[l2index];
00600       }
00601       break;
00602     case BPred2bit:
00603       p = &pred_dir->config.bimod.table[BIMOD_HASH(pred_dir, baddr)];
00604       break;
00605     case BPredTaken:
00606     case BPredNotTaken:
00607       break;
00608     default:
00609       panic("bogus branch direction predictor class");
00610     }
00611 
00612   return (char *)p;
00613 }
00614 
00615 /* probe a predictor for a next fetch address, the predictor is probed
00616    with branch address BADDR, the branch target is BTARGET (used for
00617    static predictors), and OP is the instruction opcode (used to simulate
00618    predecode bits; a pointer to the predictor state entry (or null for jumps)
00619    is returned in *DIR_UPDATE_PTR (used for updating predictor state),
00620    and the non-speculative top-of-stack is returned in stack_recover_idx 
00621    (used for recovering ret-addr stack after mis-predict).  */
00622 md_addr_t                               /* predicted branch target addr */
00623 bpred_lookup(struct bpred_t *pred,      /* branch predictor instance */
00624              md_addr_t baddr,           /* branch address */
00625              md_addr_t btarget,         /* branch target if taken */
00626              enum md_opcode op,         /* opcode of instruction */
00627              int is_call,               /* non-zero if inst is fn call */
00628              int is_return,             /* non-zero if inst is fn return */
00629              struct bpred_update_t *dir_update_ptr, /* pred state pointer */
00630              int *stack_recover_idx)    /* Non-speculative top-of-stack;
00631                                          * used on mispredict recovery */
00632 {
00633   struct bpred_btb_ent_t *pbtb = NULL;
00634   int index, i;
00635 
00636   if (!dir_update_ptr)
00637     panic("no bpred update record");
00638 
00639   /* if this is not a branch, return not-taken */
00640   if (!(MD_OP_FLAGS(op) & F_CTRL))
00641     return 0;
00642 
00643   pred->lookups++;
00644 
00645   dir_update_ptr->dir.ras = FALSE;
00646   dir_update_ptr->pdir1 = NULL;
00647   dir_update_ptr->pdir2 = NULL;
00648   dir_update_ptr->pmeta = NULL;
00649   /* Except for jumps, get a pointer to direction-prediction bits */
00650   switch (pred->class) {
00651     case BPredComb:
00652       if ((MD_OP_FLAGS(op) & (F_CTRL|F_UNCOND)) != (F_CTRL|F_UNCOND))
00653         {
00654           char *bimod, *twolev, *meta;
00655           bimod = bpred_dir_lookup (pred->dirpred.bimod, baddr);
00656           twolev = bpred_dir_lookup (pred->dirpred.twolev, baddr);
00657           meta = bpred_dir_lookup (pred->dirpred.meta, baddr);
00658           dir_update_ptr->pmeta = meta;
00659           dir_update_ptr->dir.meta  = (*meta >= 2);
00660           dir_update_ptr->dir.bimod = (*bimod >= 2);
00661           dir_update_ptr->dir.twolev  = (*twolev >= 2);
00662           if (*meta >= 2)
00663             {
00664               dir_update_ptr->pdir1 = twolev;
00665               dir_update_ptr->pdir2 = bimod;
00666             }
00667           else
00668             {
00669               dir_update_ptr->pdir1 = bimod;
00670               dir_update_ptr->pdir2 = twolev;
00671             }
00672         }
00673       break;
00674     case BPred2Level:
00675       if ((MD_OP_FLAGS(op) & (F_CTRL|F_UNCOND)) != (F_CTRL|F_UNCOND))
00676         {
00677           dir_update_ptr->pdir1 =
00678             bpred_dir_lookup (pred->dirpred.twolev, baddr);
00679         }
00680       break;
00681     case BPred2bit:
00682       if ((MD_OP_FLAGS(op) & (F_CTRL|F_UNCOND)) != (F_CTRL|F_UNCOND))
00683         {
00684           dir_update_ptr->pdir1 =
00685             bpred_dir_lookup (pred->dirpred.bimod, baddr);
00686         }
00687       break;
00688     case BPredTaken:
00689       return btarget;
00690     case BPredNotTaken:
00691       if ((MD_OP_FLAGS(op) & (F_CTRL|F_UNCOND)) != (F_CTRL|F_UNCOND))
00692         {
00693           return baddr + sizeof(md_inst_t);
00694         }
00695       else
00696         {
00697           return btarget;
00698         }
00699     default:
00700       panic("bogus predictor class");
00701   }
00702 
00703   /*
00704    * We have a stateful predictor, and have gotten a pointer into the
00705    * direction predictor (except for jumps, for which the ptr is null)
00706    */
00707 
00708   /* record pre-pop TOS; if this branch is executed speculatively
00709    * and is squashed, we'll restore the TOS and hope the data
00710    * wasn't corrupted in the meantime. */
00711   if (pred->retstack.size)
00712     *stack_recover_idx = pred->retstack.tos;
00713   else
00714     *stack_recover_idx = 0;
00715 
00716   /* if this is a return, pop return-address stack */
00717   if (is_return && pred->retstack.size)
00718     {
00719       md_addr_t target = pred->retstack.stack[pred->retstack.tos].target;
00720       pred->retstack.tos = (pred->retstack.tos + pred->retstack.size - 1)
00721                            % pred->retstack.size;
00722       pred->retstack_pops++;
00723       dir_update_ptr->dir.ras = TRUE; /* using RAS here */
00724       return target;
00725     }
00726 
00727 #ifndef RAS_BUG_COMPATIBLE
00728   /* if function call, push return-address onto return-address stack */
00729   if (is_call && pred->retstack.size)
00730     {
00731       pred->retstack.tos = (pred->retstack.tos + 1)% pred->retstack.size;
00732       pred->retstack.stack[pred->retstack.tos].target = 
00733         baddr + sizeof(md_inst_t);
00734       pred->retstack_pushes++;
00735     }
00736 #endif /* !RAS_BUG_COMPATIBLE */
00737   
00738   /* not a return. Get a pointer into the BTB */
00739   index = (baddr >> MD_BR_SHIFT) & (pred->btb.sets - 1);
00740 
00741   if (pred->btb.assoc > 1)
00742     {
00743       index *= pred->btb.assoc;
00744 
00745       /* Now we know the set; look for a PC match */
00746       for (i = index; i < (index+pred->btb.assoc) ; i++)
00747         if (pred->btb.btb_data[i].addr == baddr)
00748           {
00749             /* match */
00750             pbtb = &pred->btb.btb_data[i];
00751             break;
00752           }
00753     }   
00754   else
00755     {
00756       pbtb = &pred->btb.btb_data[index];
00757       if (pbtb->addr != baddr)
00758         pbtb = NULL;
00759     }
00760 
00761   /*
00762    * We now also have a pointer into the BTB for a hit, or NULL otherwise
00763    */
00764 
00765   /* if this is a jump, ignore predicted direction; we know it's taken. */
00766   if ((MD_OP_FLAGS(op) & (F_CTRL|F_UNCOND)) == (F_CTRL|F_UNCOND))
00767     {
00768       return (pbtb ? pbtb->target : 1);
00769     }
00770 
00771   /* otherwise we have a conditional branch */
00772   if (pbtb == NULL)
00773     {
00774       /* BTB miss -- just return a predicted direction */
00775       return ((*(dir_update_ptr->pdir1) >= 2)
00776               ? /* taken */ 1
00777               : /* not taken */ 0);
00778     }
00779   else
00780     {
00781       /* BTB hit, so return target if it's a predicted-taken branch */
00782       return ((*(dir_update_ptr->pdir1) >= 2)
00783               ? /* taken */ pbtb->target
00784               : /* not taken */ 0);
00785     }
00786 }
00787 
00788 /* Speculative execution can corrupt the ret-addr stack.  So for each
00789  * lookup we return the top-of-stack (TOS) at that point; a mispredicted
00790  * branch, as part of its recovery, restores the TOS using this value --
00791  * hopefully this uncorrupts the stack. */
00792 void
00793 bpred_recover(struct bpred_t *pred,     /* branch predictor instance */
00794               md_addr_t baddr,          /* branch address */
00795               int stack_recover_idx)    /* Non-speculative top-of-stack;
00796                                          * used on mispredict recovery */
00797 {
00798   if (pred == NULL)
00799     return;
00800 
00801   pred->retstack.tos = stack_recover_idx;
00802 }
00803 
00804 /* update the branch predictor, only useful for stateful predictors; updates
00805    entry for instruction type OP at address BADDR.  BTB only gets updated
00806    for branches which are taken.  Inst was determined to jump to
00807    address BTARGET and was taken if TAKEN is non-zero.  Predictor 
00808    statistics are updated with result of prediction, indicated by CORRECT and 
00809    PRED_TAKEN, predictor state to be updated is indicated by *DIR_UPDATE_PTR 
00810    (may be NULL for jumps, which shouldn't modify state bits).  Note if
00811    bpred_update is done speculatively, branch-prediction may get polluted. */
00812 void
00813 bpred_update(struct bpred_t *pred,      /* branch predictor instance */
00814              md_addr_t baddr,           /* branch address */
00815              md_addr_t btarget,         /* resolved branch target */
00816              int taken,                 /* non-zero if branch was taken */
00817              int pred_taken,            /* non-zero if branch was pred taken */
00818              int correct,               /* was earlier addr prediction ok? */
00819              enum md_opcode op,         /* opcode of instruction */
00820              struct bpred_update_t *dir_update_ptr)/* pred state pointer */
00821 {
00822   struct bpred_btb_ent_t *pbtb = NULL;
00823   struct bpred_btb_ent_t *lruhead = NULL, *lruitem = NULL;
00824   int index, i;
00825 
00826   /* don't change bpred state for non-branch instructions or if this
00827    * is a stateless predictor*/
00828   if (!(MD_OP_FLAGS(op) & F_CTRL))
00829     return;
00830 
00831   /* Have a branch here */
00832 
00833   if (correct)
00834     pred->addr_hits++;
00835 
00836   if (!!pred_taken == !!taken)
00837     pred->dir_hits++;
00838   else
00839     pred->misses++;
00840 
00841   if (dir_update_ptr->dir.ras)
00842     {
00843       pred->used_ras++;
00844       if (correct)
00845         pred->ras_hits++;
00846     }
00847   else if ((MD_OP_FLAGS(op) & (F_CTRL|F_COND)) == (F_CTRL|F_COND))
00848     {
00849       if (dir_update_ptr->dir.meta)
00850         pred->used_2lev++;
00851       else
00852         pred->used_bimod++;
00853     }
00854 
00855   /* keep stats about JR's; also, but don't change any bpred state for JR's
00856    * which are returns unless there's no retstack */
00857   if (MD_IS_INDIR(op))
00858     {
00859       pred->jr_seen++;
00860       if (correct)
00861         pred->jr_hits++;
00862       
00863       if (!dir_update_ptr->dir.ras)
00864         {
00865           pred->jr_non_ras_seen++;
00866           if (correct)
00867             pred->jr_non_ras_hits++;
00868         }
00869       else
00870         {
00871           /* return that used the ret-addr stack; no further work to do */
00872           return;
00873         }
00874     }
00875 
00876   /* Can exit now if this is a stateless predictor */
00877   if (pred->class == BPredNotTaken || pred->class == BPredTaken)
00878     return;
00879 
00880   /* 
00881    * Now we know the branch didn't use the ret-addr stack, and that this
00882    * is a stateful predictor 
00883    */
00884 
00885 #ifdef RAS_BUG_COMPATIBLE
00886   /* if function call, push return-address onto return-address stack */
00887   if (MD_IS_CALL(op) && pred->retstack.size)
00888     {
00889       pred->retstack.tos = (pred->retstack.tos + 1)% pred->retstack.size;
00890       pred->retstack.stack[pred->retstack.tos].target = 
00891         baddr + sizeof(md_inst_t);
00892       pred->retstack_pushes++;
00893     }
00894 #endif /* RAS_BUG_COMPATIBLE */
00895 
00896   /* update L1 table if appropriate */
00897   /* L1 table is updated unconditionally for combining predictor too */
00898   if ((MD_OP_FLAGS(op) & (F_CTRL|F_UNCOND)) != (F_CTRL|F_UNCOND) &&
00899       (pred->class == BPred2Level || pred->class == BPredComb))
00900     {
00901       int l1index, shift_reg;
00902       
00903       /* also update appropriate L1 history register */
00904       l1index =
00905         (baddr >> MD_BR_SHIFT) & (pred->dirpred.twolev->config.two.l1size - 1);
00906       shift_reg =
00907         (pred->dirpred.twolev->config.two.shiftregs[l1index] << 1) | (!!taken);
00908       pred->dirpred.twolev->config.two.shiftregs[l1index] =
00909         shift_reg & ((1 << pred->dirpred.twolev->config.two.shift_width) - 1);
00910     }
00911 
00912   /* find BTB entry if it's a taken branch (don't allocate for non-taken) */
00913   if (taken)
00914     {
00915       index = (baddr >> MD_BR_SHIFT) & (pred->btb.sets - 1);
00916       
00917       if (pred->btb.assoc > 1)
00918         {
00919           index *= pred->btb.assoc;
00920           
00921           /* Now we know the set; look for a PC match; also identify
00922            * MRU and LRU items */
00923           for (i = index; i < (index+pred->btb.assoc) ; i++)
00924             {
00925               if (pred->btb.btb_data[i].addr == baddr)
00926                 {
00927                   /* match */
00928                   assert(!pbtb);
00929                   pbtb = &pred->btb.btb_data[i];
00930                 }
00931               
00932               dassert(pred->btb.btb_data[i].prev 
00933                       != pred->btb.btb_data[i].next);
00934               if (pred->btb.btb_data[i].prev == NULL)
00935                 {
00936                   /* this is the head of the lru list, ie current MRU item */
00937                   dassert(lruhead == NULL);
00938                   lruhead = &pred->btb.btb_data[i];
00939                 }
00940               if (pred->btb.btb_data[i].next == NULL)
00941                 {
00942                   /* this is the tail of the lru list, ie the LRU item */
00943                   dassert(lruitem == NULL);
00944                   lruitem = &pred->btb.btb_data[i];
00945                 }
00946             }
00947           dassert(lruhead && lruitem);
00948           
00949           if (!pbtb)
00950             /* missed in BTB; choose the LRU item in this set as the victim */
00951             pbtb = lruitem;     
00952           /* else hit, and pbtb points to matching BTB entry */
00953           
00954           /* Update LRU state: selected item, whether selected because it
00955            * matched or because it was LRU and selected as a victim, becomes 
00956            * MRU */
00957           if (pbtb != lruhead)
00958             {
00959               /* this splices out the matched entry... */
00960               if (pbtb->prev)
00961                 pbtb->prev->next = pbtb->next;
00962               if (pbtb->next)
00963                 pbtb->next->prev = pbtb->prev;
00964               /* ...and this puts the matched entry at the head of the list */
00965               pbtb->next = lruhead;
00966               pbtb->prev = NULL;
00967               lruhead->prev = pbtb;
00968               dassert(pbtb->prev || pbtb->next);
00969               dassert(pbtb->prev != pbtb->next);
00970             }
00971           /* else pbtb is already MRU item; do nothing */
00972         }
00973       else
00974         pbtb = &pred->btb.btb_data[index];
00975     }
00976       
00977   /* 
00978    * Now 'p' is a possibly null pointer into the direction prediction table, 
00979    * and 'pbtb' is a possibly null pointer into the BTB (either to a 
00980    * matched-on entry or a victim which was LRU in its set)
00981    */
00982 
00983   /* update state (but not for jumps) */
00984   if (dir_update_ptr->pdir1)
00985     {
00986       if (taken)
00987         {
00988           if (*dir_update_ptr->pdir1 < 3)
00989             ++*dir_update_ptr->pdir1;
00990         }
00991       else
00992         { /* not taken */
00993           if (*dir_update_ptr->pdir1 > 0)
00994             --*dir_update_ptr->pdir1;
00995         }
00996     }
00997 
00998   /* combining predictor also updates second predictor and meta predictor */
00999   /* second direction predictor */
01000   if (dir_update_ptr->pdir2)
01001     {
01002       if (taken)
01003         {
01004           if (*dir_update_ptr->pdir2 < 3)
01005             ++*dir_update_ptr->pdir2;
01006         }
01007       else
01008         { /* not taken */
01009           if (*dir_update_ptr->pdir2 > 0)
01010             --*dir_update_ptr->pdir2;
01011         }
01012     }
01013 
01014   /* meta predictor */
01015   if (dir_update_ptr->pmeta)
01016     {
01017       if (dir_update_ptr->dir.bimod != dir_update_ptr->dir.twolev)
01018         {
01019           /* we only update meta predictor if directions were different */
01020           if (dir_update_ptr->dir.twolev == (unsigned int)taken)
01021             {
01022               /* 2-level predictor was correct */
01023               if (*dir_update_ptr->pmeta < 3)
01024                 ++*dir_update_ptr->pmeta;
01025             }
01026           else
01027             {
01028               /* bimodal predictor was correct */
01029               if (*dir_update_ptr->pmeta > 0)
01030                 --*dir_update_ptr->pmeta;
01031             }
01032         }
01033     }
01034 
01035   /* update BTB (but only for taken branches) */
01036   if (pbtb)
01037     {
01038       /* update current information */
01039       dassert(taken);
01040 
01041       if (pbtb->addr == baddr)
01042         {
01043           if (!correct)
01044             pbtb->target = btarget;
01045         }
01046       else
01047         {
01048           /* enter a new branch in the table */
01049           pbtb->addr = baddr;
01050           pbtb->op = op;
01051           pbtb->target = btarget;
01052         }
01053     }
01054 }


UVa CS Department of Computer Science
School of Engineering, University of Virginia
151 Engineer's Way, P.O. Box 400740
Charlottesville, Virginia 22904-4740

(434) 982-2200  Fax: (434) 982-2214