"I am a person who works hard and plays hard."

Yuan Wei
Second Year Graduate Student Department of Computer Science
University of Virginia Charlottesville, VA 22903
Email: yw3f@cs.virginia.edu


Source Code Analysis

Main Page   Compound List   File List   Compound Members   File Members  

anagram.c File Reference

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <setjmp.h>

Include dependency graph for alpha/src/anagram.c:

Include dependency graph

Go to the source code of this file.

Compounds

Defines

Typedefs

Functions

Variables


Define Documentation

#define ALPHABET   26
 

Definition at line 158 of file alpha/src/anagram.c.

Referenced by BuildMask(), BuildWord(), Debug(), and SortCandidates().

#define bigmalloc   malloc
 

Definition at line 178 of file alpha/src/anagram.c.

#define Cdecl
 

Definition at line 182 of file alpha/src/anagram.c.

Referenced by CompareFrequency(), and main().

#define ch2i ch       ((ch)-'a')
 

Definition at line 159 of file alpha/src/anagram.c.

#define Debug x   
 

Definition at line 190 of file alpha/src/anagram.c.

#define far
 

Definition at line 176 of file alpha/src/anagram.c.

 
#define HaltProcessing      0
 

Definition at line 181 of file alpha/src/anagram.c.

#define huge
 

Definition at line 175 of file alpha/src/anagram.c.

#define i2ch ch       ((ch)+'a')
 

Definition at line 160 of file alpha/src/anagram.c.

#define lPhrase ch       alPhrase[ch2i(ch)]
 

Definition at line 225 of file alpha/src/anagram.c.

#define MASK_BITS   32
 

Definition at line 150 of file alpha/src/anagram.c.

Referenced by BuildMask().

#define MAX_QUADS   2
 

Definition at line 152 of file alpha/src/anagram.c.

Referenced by BuildMask(), BuildWord(), and FindAnagram().

#define MAXCAND   5000
 

Definition at line 155 of file alpha/src/anagram.c.

Referenced by NextWord().

#define MAXSOL   51
 

Definition at line 156 of file alpha/src/anagram.c.

#define MAXWORDS   26000
 

Definition at line 154 of file alpha/src/anagram.c.

Referenced by ReadDict().

#define OneStep  
 

Value:

if ((aqNext[i] = pqMask[i] - pw->aqMask[i]) & aqMainSign[i]) { \
        ppwStart++; \
        continue; \
    }

Definition at line 491 of file alpha/src/anagram.c.

#define smallmalloc   malloc
 

Definition at line 179 of file alpha/src/anagram.c.

#define smallmallocfail   (char *)0
 

Definition at line 180 of file alpha/src/anagram.c.

#define Stat x   
 

Definition at line 196 of file alpha/src/anagram.c.

#define StringFormat   "%15s%c"
 

Definition at line 177 of file alpha/src/anagram.c.

Referenced by DumpCandidates().

#define Zero t       memset(t, 0, sizeof(t))
 

Definition at line 243 of file alpha/src/anagram.c.


Typedef Documentation

typedef Letter* PLetter
 

Definition at line 222 of file alpha/src/anagram.c.

typedef Word* * PPWord
 

Definition at line 206 of file alpha/src/anagram.c.

typedef Word* PWord
 

Definition at line 205 of file alpha/src/anagram.c.

typedef unsigned long Quad
 

Definition at line 149 of file alpha/src/anagram.c.

Referenced by BuildMask(), BuildWord(), Debug(), and FindAnagram().


Function Documentation

void AddWords void   
 

Definition at line 440 of file alpha/src/anagram.c.

References BuildWord(), cchMinLength, cchPhraseLength, cpwCand, and pchDictionary.

00440                {
00441     char * pch = pchDictionary;     /* walk through the dictionary */
00442 
00443     cpwCand = 0;
00444 
00445     while (*pch) {
00446         if ((pch[1] >= cchMinLength && pch[1]+cchMinLength <= cchPhraseLength)
00447             || pch[1] == cchPhraseLength)
00448             BuildWord(pch+2);
00449         pch += *pch;
00450     }
00451 
00452     fprintf(stdout, "%d candidates\n", cpwCand);
00453 }

void BuildMask char *    pchPhrase
 

Definition at line 307 of file alpha/src/anagram.c.

References ALPHABET, aqMainMask, aqMainSign, auGlobalFrequency, cchPhraseLength, Fatal(), Letter::iq, lPhrase, MASK_BITS, MAX_QUADS, Quad, Letter::uBits, Letter::uFrequency, and Letter::uShift.

00307                                  {
00308     int i;
00309     int ch;
00310     unsigned iq;                        /* which Quad? */
00311     int cbtUsed;                        /* bits used in the current Quad */
00312     int cbtNeed;                        /* bits needed for current letter */
00313     Quad qNeed;                         /* used to build the mask */
00314 
00315     bzero(alPhrase, sizeof(Letter)*ALPHABET);
00316     bzero(aqMainMask, sizeof(Quad)*MAX_QUADS);
00317     bzero(aqMainSign, sizeof(Quad)*MAX_QUADS);
00318 /*
00319     Zero(alPhrase);
00320     Zero(aqMainMask);
00321     Zero(aqMainSign);
00322 */
00323 
00324     /* Tabulate letter frequencies in the phrase */
00325     cchPhraseLength = 0;
00326     while ((ch = *pchPhrase++) != '\0') {
00327         if (isalpha(ch)) {
00328             ch = tolower(ch);
00329             lPhrase(ch).uFrequency++;
00330             cchPhraseLength++;
00331         }
00332     }
00333 
00334     /* Build  masks */
00335     iq = 0;                             /* which quad being used */
00336     cbtUsed = 0;                        /* bits used so far */
00337 
00338     for (i = 0; i < ALPHABET; i++) {
00339         if (alPhrase[i].uFrequency == 0) {
00340             auGlobalFrequency[i] = ~0;  /* to make it sort last */
00341         } else {
00342             auGlobalFrequency[i] = 0;
00343             for (cbtNeed = 1, qNeed = 1;
00344                  alPhrase[i].uFrequency >= qNeed;
00345                  cbtNeed++, qNeed <<= 1);
00346             if (cbtUsed + cbtNeed > MASK_BITS) {
00347                 if (++iq >= MAX_QUADS)
00348                     Fatal("MAX_QUADS not large enough\n", 0);
00349                 cbtUsed = 0;
00350             }
00351             alPhrase[i].uBits = qNeed-1;
00352             if (cbtUsed)
00353                 qNeed <<= cbtUsed;
00354             aqMainSign[iq] |= qNeed;
00355             aqMainMask[iq] |= (Quad)alPhrase[i].uFrequency << cbtUsed;
00356             alPhrase[i].uShift = cbtUsed;
00357             alPhrase[i].iq = iq;
00358             cbtUsed += cbtNeed;
00359         }
00360     }
00361 }

void BuildWord char *    pchWord
 

Definition at line 399 of file alpha/src/anagram.c.

References ALPHABET, Word::aqMask, auGlobalFrequency, Word::cchLength, ch2i, Debug, Letter::iq, MAX_QUADS, NextWord(), Word::pchWord, Quad, Letter::uFrequency, Letter::uShift, and wprint().

00399                                {
00400     unsigned char cchFrequency[ALPHABET];
00401     int i;
00402     char * pch = pchWord;
00403     PWord pw;
00404     int cchLength = 0;
00405 
00406     bzero(cchFrequency, sizeof(unsigned char)*ALPHABET);
00407     /* Zero(cchFrequency); */
00408 
00409     /* Build frequency table */
00410     while ((i = *pch++) != '\0') {
00411         if (!isalpha(i)) continue;
00412         i = ch2i(tolower(i));
00413         if (++cchFrequency[i] > alPhrase[i].uFrequency)
00414             return;
00415         ++cchLength;
00416     }
00417 
00418     Debug(wprint(pchWord);)
00419 
00420     /* Update global count */
00421     for (i = 0; i < ALPHABET; i++)
00422         auGlobalFrequency[i] += cchFrequency[i];
00423 
00424     /* Create a Word structure and fill it in, including building the
00425      * bitfield of frequencies.
00426      */
00427     pw = NextWord();
00428     bzero(pw->aqMask, sizeof(Quad)*MAX_QUADS);
00429     /* Zero(pw->aqMask); */
00430     pw->pchWord = pchWord;
00431     pw->cchLength = cchLength;
00432     for (i = 0; i < ALPHABET; i++) {
00433         pw->aqMask[alPhrase[i].iq] |=
00434             (Quad)cchFrequency[i] << alPhrase[i].uShift;
00435     }
00436 }

int Cdecl CompareFrequency char *    pch1,
char *    pch2
 

Definition at line 577 of file alpha/src/anagram.c.

References auGlobalFrequency, and Cdecl.

Referenced by SortCandidates().

00577                                                    {
00578     return auGlobalFrequency[*pch1] < auGlobalFrequency[*pch2]
00579         ?  -1 :
00580            auGlobalFrequency[*pch1] == auGlobalFrequency[*pch2]
00581         ?   0 : 1;
00582 }

Debug  
 

Definition at line 466 of file alpha/src/anagram.c.

References ALPHABET, cpwLast, Quad, and wprint().

00467                          {
00468     int i;
00469     Quad q;
00470     for (i = 0; i < ALPHABET; i++) {
00471         if (alPhrase[i].uFrequency == 0) continue;
00472         q = pq[alPhrase[i].iq];
00473         if (alPhrase[i].uShift) q >>= alPhrase[i].uShift;
00474         q &= alPhrase[i].uBits;
00475         while (q--) putchar('a'+i);
00476     }
00477     putchar(' ');
00478 }
00479 )                                       /* End of debug code */
00480 
00481 void DumpWords(void) {
00482     int i;
00483     for (i = 0; i < cpwLast; i++) wprint(apwSol[i]->pchWord);
00484     printf("\n");
00485 }

void DumpCandidates void   
 

Definition at line 455 of file alpha/src/anagram.c.

References cpwCand, and StringFormat.

00455                           {
00456     unsigned u;
00457 
00458     for (u = 0; u < cpwCand; u++)
00459         printf(StringFormat, apwCand[u]->pchWord, (u % 4 == 3) ? '\n' : ' ');
00460     printf("\n");
00461 }

void Fatal char *    pchMsg,
unsigned    u
 

Definition at line 246 of file alpha/src/anagram.c.

00246                                      {
00247     fprintf(stdout, pchMsg, u);
00248     exit(1);
00249 }

void FindAnagram Quad   pqMask,
PPWord    ppwStart,
int    iLetter
 

Definition at line 499 of file alpha/src/anagram.c.

References achByFrequency, Word::aqMask, Word::cchLength, cchPhraseLength, cpwCand, cpwLast, Debug, FindAnagram(), HaltProcessing, i2ch, MAX_QUADS, OneStep, Quad, and Stat.

00500 {
00501     Quad aqNext[MAX_QUADS];
00502     register PWord pw;
00503     Quad qMask;
00504     unsigned iq;
00505     PPWord ppwEnd = &apwCand[0];
00506     ppwEnd += cpwCand;
00507 
00508     ;
00509 
00510     if (HaltProcessing()) longjmp(jbAnagram, 1);
00511 
00512     Debug(printf("Trying :"); DumpWord(pqMask); printf(":\n");)
00513 
00514     for (;;) {
00515         iq = alPhrase[achByFrequency[iLetter]].iq;
00516         qMask = alPhrase[achByFrequency[iLetter]].uBits <<
00517                 alPhrase[achByFrequency[iLetter]].uShift;
00518         if (pqMask[iq] & qMask) break;
00519         iLetter++;
00520     }
00521 
00522     Debug(printf("Pivoting on %c\n", i2ch(achByFrequency[iLetter]));)
00523 
00524     while (ppwStart < ppwEnd) {          /* Half of the program execution */
00525         pw = *ppwStart;                  /* time is spent in these three */
00526 
00527         Stat(if (++ulLowCount == 0) ++ulHighCount;)
00528 
00529 #if MAX_QUADS > 0
00530         OneStep(0);                     /* lines of code. */
00531 #endif
00532 
00533 #if MAX_QUADS > 1
00534         OneStep(1);
00535 #endif
00536 
00537 #if MAX_QUADS > 2
00538         OneStep(2);
00539 #endif
00540 
00541 #if MAX_QUADS > 3
00542         OneStep(3);
00543 #endif
00544 
00545 #if MAX_QUADS > 4
00546             @@"Add more unrolling steps here, please."@@
00547 #endif
00548 
00549         /* If the pivot letter isn't present, defer this word until later */
00550         if ((pw->aqMask[iq] & qMask) == 0) {
00551             *ppwStart = *--ppwEnd;
00552             *ppwEnd = pw;
00553             continue;
00554         }
00555 
00556         /* If we get here, this means the word fits. */
00557         apwSol[cpwLast++] = pw;
00558         if (cchPhraseLength -= pw->cchLength) { /* recurse */
00559             Debug(DumpWords();)
00560             /* The recursive call scrambles the tail, so we have to be
00561              * pessimistic.
00562              */
00563             ppwEnd = &apwCand[0];
00564             ppwEnd += cpwCand;
00565             FindAnagram(&aqNext[0],
00566                         ppwStart, iLetter);
00567         } else DumpWords();             /* found one */
00568         cchPhraseLength += pw->cchLength;
00569         --cpwLast;
00570         ppwStart++;
00571         continue;
00572     }
00573 
00574     ;
00575 }

char* GetPhrase char *    pch
 

Definition at line 600 of file alpha/src/anagram.c.

00600                              {
00601     if (fInteractive) printf(">");
00602     fflush(stdout);
00603     if (gets(pch) == NULL) {
00604 #ifdef PLUS_STATS
00605         PrintDerefStats(stdout);
00606         PrintHeapSize(stdout);
00607 #endif /* PLUS_STATS */
00608         exit(0);
00609     }
00610     return(pch);
00611 }

int Cdecl main int    cpchArgc,
char **    ppchArgv
 

Definition at line 615 of file alpha/src/anagram.c.

References achPhrase, AddWords(), aqMainMask, BuildMask(), cchMinLength, cchPhraseLength, Cdecl, cpwCand, cpwLast, DumpCandidates(), Fatal(), FindAnagram(), fInteractive, GetPhrase(), ReadDict(), SortCandidates(), and Stat.

00615                                               {
00616 
00617     if (cpchArgc != 2 && cpchArgc != 3)
00618         Fatal("Usage: anagram dictionary [length]\n", 0);
00619 
00620     if (cpchArgc == 3)
00621         cchMinLength = atoi(ppchArgv[2]);
00622 
00623     fInteractive = isatty(1);
00624 
00625     ReadDict(ppchArgv[1]);
00626 
00627     while (GetPhrase(&achPhrase[0]) != NULL) {
00628         if (isdigit(achPhrase[0])) {
00629             cchMinLength = atoi(achPhrase);
00630             printf("New length: %d\n", cchMinLength);
00631         } else if (achPhrase[0] == '?') {
00632             DumpCandidates();
00633         } else {
00634             BuildMask(&achPhrase[0]);
00635             AddWords();
00636             if (cpwCand == 0 || cchPhraseLength == 0) continue;
00637 
00638             Stat(ulHighCount = ulLowCount = 0;)
00639             cpwLast = 0;
00640             SortCandidates();
00641             if (setjmp(jbAnagram) == 0)
00642                 FindAnagram(&aqMainMask[0], &apwCand[0], 0);
00643             Stat(printf("%lu:%lu probes\n", ulHighCount, ulLowCount);)
00644         }
00645     }
00646     return 0;
00647 }

PWord NextWord void   
 

Definition at line 385 of file alpha/src/anagram.c.

References cpwCand, Fatal(), MAXCAND, and NewWord.

00385                      {
00386     PWord pw;
00387     if (cpwCand >= MAXCAND)
00388         Fatal("Too many candidates\n", 0);
00389     pw = apwCand[cpwCand++];
00390     if (pw != NULL)
00391         return pw;
00392     apwCand[cpwCand-1] = NewWord();
00393     return apwCand[cpwCand-1];
00394 }

void ReadDict char *    pchFile
 

Definition at line 263 of file alpha/src/anagram.c.

References Fatal(), MAXWORDS, and pchDictionary.

00263                              {
00264     FILE *fp;
00265     char * pch;
00266     char * pchBase;
00267     unsigned long ulLen;
00268     unsigned cWords = 0;
00269     unsigned cLetters;
00270     int ch;
00271     struct stat statBuf;
00272 
00273     if (stat(pchFile, &statBuf)) Fatal("Cannot stat dictionary\n", 0);
00274 
00275     ulLen = statBuf.st_size + 2 * (unsigned long)MAXWORDS;
00276     pchBase = pchDictionary = (char *)malloc(ulLen);
00277 
00278     if(pchDictionary == NULL)
00279         Fatal("Unable to allocate memory for dictionary\n", 0);
00280 
00281     if ((fp = fopen(pchFile, "r")) == NULL)
00282         Fatal("Cannot open dictionary\n", 0);
00283 
00284     while (!feof(fp)) {
00285         pch = pchBase+2;                /* reserve for length */
00286         cLetters = 0;
00287         while ((ch = fgetc(fp)) != '\n' && ch != EOF) {
00288             if (isalpha(ch)) cLetters++;
00289             *pch++ = ch;
00290         }
00291         *pch++ = '\0';
00292         *pchBase = pch - pchBase;
00293         pchBase[1] = cLetters;
00294         pchBase = pch;
00295         cWords++;
00296     }
00297     fclose(fp);
00298 
00299     *pchBase++ = 0;
00300 
00301     fprintf(stdout, "main dictionary has %u entries\n", cWords);
00302     if (cWords >= MAXWORDS)
00303         Fatal("Dictionary too large; increase MAXWORDS\n", 0);
00304     fprintf(stdout, "%lu bytes wasted\n", ulLen - (pchBase - pchDictionary));
00305 }

void SortCandidates void   
 

Definition at line 584 of file alpha/src/anagram.c.

References achByFrequency, ALPHABET, CompareFrequency(), and i2ch.

00584                           {
00585     int i;
00586 
00587     /* Sort the letters by frequency */
00588     for (i = 0; i < ALPHABET; i++) achByFrequency[i] = i;
00589     qsort(achByFrequency, ALPHABET, sizeof(char),
00590           (int (*)(const void *, const void *))CompareFrequency);
00591 
00592     fprintf(stdout, "Order of search will be ");
00593     for (i = 0; i < ALPHABET; i++)
00594         fputc(i2ch(achByFrequency[i]), stdout);
00595     fputc('\n', stdout);
00596 }

Stat unsigned long ulHighCount;unsigned long ulLowCount;   
 

void wprint char *    pch
 

Definition at line 378 of file alpha/src/anagram.c.

00378                         {
00379     printf("%s ", pch);
00380 }


Variable Documentation

char achByFrequency[ALPHABET]
 

Definition at line 239 of file alpha/src/anagram.c.

Referenced by FindAnagram(), and SortCandidates().

char achPhrase[255]
 

Definition at line 613 of file alpha/src/anagram.c.

Referenced by main().

Letter alPhrase[ALPHABET]
 

Definition at line 224 of file alpha/src/anagram.c.

PWord apwCand[MAXCAND]
 

Definition at line 208 of file alpha/src/anagram.c.

PWord apwSol[MAXSOL]
 

Definition at line 463 of file alpha/src/anagram.c.

Quad aqMainMask[MAX_QUADS]
 

Definition at line 229 of file alpha/src/anagram.c.

Referenced by BuildMask(), and main().

Quad aqMainSign[MAX_QUADS]
 

Definition at line 230 of file alpha/src/anagram.c.

Referenced by BuildMask().

unsigned auGlobalFrequency[ALPHABET]
 

Definition at line 238 of file alpha/src/anagram.c.

Referenced by BuildMask(), BuildWord(), and CompareFrequency().

int cchMinLength = 3
 

Definition at line 232 of file alpha/src/anagram.c.

Referenced by AddWords(), and main().

int cchPhraseLength
 

Definition at line 227 of file alpha/src/anagram.c.

Referenced by AddWords(), BuildMask(), FindAnagram(), and main().

unsigned cpwCand
 

Definition at line 209 of file alpha/src/anagram.c.

Referenced by AddWords(), DumpCandidates(), FindAnagram(), main(), and NextWord().

int cpwLast
 

Definition at line 464 of file alpha/src/anagram.c.

Referenced by Debug(), FindAnagram(), and main().

int fInteractive
 

Definition at line 598 of file alpha/src/anagram.c.

Referenced by main().

PWord NewWord(void)
 

Definition at line 364 of file alpha/src/anagram.c.

00364               {
00365     PWord pw;
00366 
00367     pw = (Word *)malloc(sizeof(Word));
00368     if (pw == NULL)
00369         Fatal("Out of memory after %d candidates\n", cpwCand);
00370     return pw;
00371 }

PWord NextWord(void)
 

Definition at line 385 of file alpha/src/anagram.c.

00385                      {
00386     PWord pw;
00387     if (cpwCand >= MAXCAND)
00388         Fatal("Too many candidates\n", 0);
00389     pw = apwCand[cpwCand++];
00390     if (pw != NULL)
00391         return pw;
00392     apwCand[cpwCand-1] = NewWord();
00393     return apwCand[cpwCand-1];
00394 }

char* pchDictionary
 

Definition at line 241 of file alpha/src/anagram.c.

Referenced by AddWords(), and ReadDict().



UVa CS Department of Computer Science
School of Engineering, University of Virginia
151 Engineer's Way, P.O. Box 400740
Charlottesville, Virginia 22904-4740

(434) 982-2200  Fax: (434) 982-2214