''' Purpose: produce spell-check corrected text ''' # get access to url support import url # CS 1112 most common words MOST_COMMON_URL = "http://www.cs.virginia.edu/~cs1112/words/most-common" # CS 1112 words corrections CSV CORRECTIONS_URL = "http://www.cs.virginia.edu/~cs1112/words/corrections.csv" # get contents of most common words url common_words_as_giant_string = url.get_text( MOST_COMMON_URL ) # get contents of corrections url dataset = url.get_dataset( CORRECTIONS_URL ) # get the user text to be checked reply = input( "Enter text: " ) # convert reply into a list of words text = reply.split() # what now ??? # we want to be able to quickly determine whether a word is spell correctly. # so we need a list of correctly spellings. # # in checking for correctly spelled we want to be forgiving -- allow word to # be in all uppercase or in title case (first letter of each word is # capitalized. # split the common into a list of words common_words = common_words_as_giant_string.split() # make a new list to be able to handle different word cases legal_spellings = [] for word in common_words: tword = word.title() # get title form of word uword = word.upper() # get upper case of word legal_spellings.append( word ) # add plain word to legal spellings legal_spellings.append( tword ) # add title form to legal spellings legal_spellings.append( uword ) # add upper case form to legal spellings # want to convert corrections dataset into a dictionary so that we can easily check # if an unknown string has a known spell correction # # in checking we want to be forgiving -- allow string to be considered in different # all uppercase or in title case corrections = {} # iterate over dataset for row in dataset: s1, s2 = row # each row is a string and its associated known word corrections[ s1 ] = s2 # add a mapping of s1 to s2 in corrections t1 = s1.title() # get title form versions of s1 and s2 t2 = s2.title() corrections[ t1 ] = t2 # add a mapping of t1 to t2 in corrections u1 = s1.upper() # get upper case versions of s1 and s2 u2 = s2.upper() corrections[ u1 ] = u2 # add a mapping to u1 to u2 in corrections # ready to process user text output = "" # going to accumulate the output string word by word for word in text: if word in legal_spellings: # if legally spelledd word, we want to use it as is out_word = word elif word in corrections: # if instead it has a known correction, use that with warning out_word = "*" + corrections[ word ] + "*" else: # otherwise, it needs to be flagged out_word = "?" + word + "?" output = output + out_word + " " # add to output accumulation # print the spell corrected version print(output)