''' Purpose: introduce web data acquisition -- print contents of the file
            word-of-the-day
        from web folder
            http://www.cs.virginia.edu/~cs1112/datasets/words/
'''

# need help to get web data - so import the capability
from urllib.request import urlopen

# urllib is a Python built in library
# The word module or library basically means a collection of predefined functions and variables
# that we can access and use. (Someone else wrote these libraries and they can be imported
# and used!)
# So basically urllib.request is where we want to get a function from and the function
# we want to use is urlopen
# urlopen is a function that can access the link (a website link)

# IMPORTANT CONSTANTS
CS1112_WORDS_WEB_FOLDER = 'http://www.cs.virginia.edu/~cs1112/datasets/words/'
#                         ^ This link right here is where Professor Cohoon has a bunch of datasets (a folder)
# We are getting into web acquisition and dataset manipulation.

FILE_NAME = 'word-of-the-day'

# get a link to file of interest
link = CS1112_WORDS_WEB_FOLDER + FILE_NAME
# So now this link ^ looks like this 'http://www.cs.virginia.edu/~cs1112/datasets/words/word-of-the-day'

# Think about how in your computer you can go into folders and get specific files you want?
# Same idea! We are just accessing an online web folder and going into the specific file
# folder/file

# get a connection to stream the web resource of interest
stream = urlopen( link )

# read stream to gets its encoded contents
page = stream.read()
print( 'page=', page ) # Now we get the version of what's on the webpage as it looks like to the COMPUTER not to you
# So the page looks like b'ineffable\n'

# decode page into plain text form (UTF-8) - UTF-8 is basically our letters and numbers
text = page.decode() # Take the page and make it readable to us as text (creates a string of what's on the webpage)
# text looks like 'ineffable'

# clean up text to get the word
word = text.strip() # Strip the string of the text

# print word of the day
print( 'word of the day:', word )