''' Purpose: produce spell-check corrected text
'''

# get access to url support
import url

# CS 1112 most common words
MOST_COMMON_URL = "http://www.cs.virginia.edu/~cs1112/words/most-common"

# CS 1112 words corrections CSV
CORRECTIONS_URL = "http://www.cs.virginia.edu/~cs1112/words/corrections.csv"

# get contents of most common words url
common_words_as_giant_string = url.get_text( MOST_COMMON_URL )

# get contents of corrections url
dataset = url.get_dataset( CORRECTIONS_URL )

# get the user text to be checked
reply = input( "Enter text: " )

# convert reply into a list of words
text = reply.split()

# what now ???

# we want to be able to quickly determine whether a word is spell correctly.
# so we need a list of correctly spellings.
#
# in checking for correctly spelled we want to be forgiving -- allow word to
# be in all uppercase or in title case (first letter of each word is
# capitalized.

# split the common into a list of words
common_words = common_words_as_giant_string.split()

# make a new list to be able to handle different word cases

legal_spellings = []
for word in common_words:
    tword = word.title()                # get title form of word
    uword = word.upper()                # get upper case of word

    legal_spellings.append( word )      # add plain word to legal spellings
    legal_spellings.append( tword )     # add title form to legal spellings
    legal_spellings.append( uword )     # add upper case form to legal spellings

# want to convert corrections dataset into a dictionary so that we can easily check
# if an unknown string has a known spell correction
#
# in checking we want to be forgiving -- allow string to be considered in different
# all uppercase or in title case 

corrections = {}

# iterate over dataset
for row in dataset:
    s1, s2 = row                        # each row is a string and its associated known word

    corrections[ s1 ] = s2              # add a mapping of s1 to s2 in corrections

    t1 = s1.title()                     # get title form versions of s1 and s2
    t2 = s2.title()
    corrections[ t1 ] = t2              # add a mapping of t1 to t2 in corrections

    u1 = s1.upper()                     # get upper case versions of s1 and s2
    u2 = s2.upper()
    corrections[ u1 ] = u2              # add a mapping to u1 to u2 in corrections

# ready to process user text
output = ""                             # going to accumulate the output string word by word
for word in text:
    if word in legal_spellings:         # if legally spelledd word, we want to use it as is
        out_word = word
    elif word in corrections:           # if instead it has a known correction, use that with warning
        out_word = "*" + corrections[ word ] + "*"
    else:                               # otherwise, it needs to be flagged
        out_word = "?" + word + "?"

    output = output + out_word + " "    # add to output accumulation

# print the spell corrected version
print(output)