# need access to python web resources import urllib.request # specify the location of the data repository BASE_URL = 'http://www.cs.virginia.edu/~cs1112/datasets/csv/' # get the name of the dataset reply = input( 'Enter the name of a dataset: ' ) name = reply.strip() # Specify where in the repository we can find the dataset web_file = BASE_URL + name print ( web_file ) # Connect the program to the web resource stream = urllib.request.urlopen( web_file ) # read the web resource, which is a stream of data contents = stream.read() # Decode text to put in standard form text = contents.decode( 'UTF-8') # Clean up text text = text.strip() # Split text into a list of lines lines = text.split( '\n' ) # \n is python's end of line marker print( lines ) # Lets convert lines to data dataset = [] # use brackets when you are trying to build a list and 0 when you want a sum of numbers and '' for a string # Need to process each line for line in lines: # clean up line line = line.strip() # split line into a list of data elements using commas csv_line = line.split( ',' ) # add another row of data to the dataset dataset.append( csv_line ) print( dataset ) # This is a pattern of how to get a dataset out of a csv in a web resource. # Lines 35 - 47 are specifically for csv files but above line 35 is the pattern to get information from a web page