# need access to python web resources
import urllib.request

# specify the location of the data repository
BASE_URL = 'http://www.cs.virginia.edu/~cs1112/datasets/csv/'

# get the name of the dataset
reply = input( 'Enter the name of a dataset: ' )

name = reply.strip()

# Specify where in the repository we can find the dataset
web_file = BASE_URL + name

print ( web_file )

# Connect the program to the web resource
stream = urllib.request.urlopen( web_file )

# read the web resource, which is a stream of data
contents = stream.read()

# Decode text to put in standard form
text = contents.decode( 'UTF-8')

# Clean up text
text = text.strip()

# Split text into a list of lines
lines = text.split( '\n' ) # \n is python's end of line marker

print( lines )

# Lets convert lines to data
dataset = [] # use brackets when you are trying to build a list and 0 when you want a sum of numbers and '' for a string

# Need to process each line
for line in lines:

    # clean up line
    line = line.strip()

    # split line into a list of data elements using commas
    csv_line = line.split( ',' )

    # add another row of data to the dataset
    dataset.append( csv_line )

print( dataset )

# This is a pattern of how to get a dataset out of a csv in a web resource.
#  Lines 35 - 47 are specifically for csv files but above line 35 is the pattern to get information from a web page