''' Purpose: introduce web data acquisition -- print contents of
        word-of-the-day.txt from http://www.cs.virginia.edu/~cs1112/datasets/words/
'''

# need help to get web data - so import the capability
from urllib.request import urlopen

# IMPORTANT CONSTANTS

# We want the CS1112 web folder to be the folder within the CS folder (using the link to our cs website) to access
# the datasets within that folder

CS1112_WORDS_WEB_FOLDER = "http://www.cs.virginia.edu/~cs1112/datasets/words/"

# FILE_NAME is the specific folder that we wanna access. We are going to have it as a string that we add to the end of
# our link ^^ so that's what link says. It concatenates the whole link with the folder url and appendage of the dataset we want.

FILE_NAME = "word-of-the-day"

# get a link to file of interest
link = CS1112_WORDS_WEB_FOLDER + FILE_NAME

# get a connection to stream the web resource of interest
stream = urlopen( link )

# read stream to get the contents of the page
contents = stream.read()

# UTF-8 CONVERTS THE READ STREAM INTO NORMAL TEXT. It's a decoder basically which means it decodes the reading into normal text.
# Once we decode it, we just clean up the word by making edits as needed to the text once we have it. :)

# decode contents into plain text form
text = contents.decode( 'UTF-8' )

# clean up text to get the word
word = text.strip() # .strip() will strip leading and trailing whitespace
# Strip is called on a string and returns the stripped version of that string.

# print word of the day
print( word )

# You don't have to put \n because in this text file, the newline character (\n) is already in there.
# Your output will be indicative as to the presence of newline characters and additional spaces.