''' Purpose: introduce web data acquisition -- print contents of word-of-the-day.txt from http://www.cs.virginia.edu/~cs1112/datasets/words/ ''' # need help to get web data - so import the capability from urllib.request import urlopen # IMPORTANT CONSTANTS # We want the CS1112 web folder to be the folder within the CS folder (using the link to our cs website) to access # the datasets within that folder CS1112_WORDS_WEB_FOLDER = "http://www.cs.virginia.edu/~cs1112/datasets/words/" # FILE_NAME is the specific folder that we wanna access. We are going to have it as a string that we add to the end of # our link ^^ so that's what link says. It concatenates the whole link with the folder url and appendage of the dataset we want. FILE_NAME = "word-of-the-day" # get a link to file of interest link = CS1112_WORDS_WEB_FOLDER + FILE_NAME # get a connection to stream the web resource of interest stream = urlopen( link ) # read stream to get the contents of the page contents = stream.read() # UTF-8 CONVERTS THE READ STREAM INTO NORMAL TEXT. It's a decoder basically which means it decodes the reading into normal text. # Once we decode it, we just clean up the word by making edits as needed to the text once we have it. :) # decode contents into plain text form text = contents.decode( 'UTF-8' ) # clean up text to get the word word = text.strip() # .strip() will strip leading and trailing whitespace # Strip is called on a string and returns the stripped version of that string. # print word of the day print( word ) # You don't have to put \n because in this text file, the newline character (\n) is already in there. # Your output will be indicative as to the presence of newline characters and additional spaces.