''' Purpose: practice getting data from a useful dataset
'''

# need help to get web data - so import the capability
from urllib.request import urlopen

# define the base folder for course csv datasets
CSV_WEB_FOLDER = 'http://www.cs.virginia.edu/~cs1112/datasets/csv/'
# specify data source

# get name of the dataset
reply = input( 'Enter name of dataset: ' )

# clean up the reply to get file name
file_name = reply.strip()

# get url link for dataset
link = CSV_WEB_FOLDER + file_name

# get a connection to stream the dataset
stream = urlopen( link )

# read stream to get the contents of the page
content = stream.read()

# decode contents into plain text form
text = content.decode( 'UTF-8' )

# print the text to see what we got
#print( 'dataset as text:', text )

# clean-up the text
text = text.strip()

# turn the text into data; i.e., a list of lines
lines = text.split( '\n' )

# print the lines to see what we got
#print( 'text as a list of lines:', lines )

# get csv rows into dataset from
dataset = []

for line in lines :
    # clean up the line
    line = line.strip()

    # split the line to cells
    cells = line.split( ',' )

    #print( cells )
    # put the cells into a new row for the data set
    row = []
    for cell in cells :
        # clean up the cell
        cell = cell.strip()

        # add the cell to the row
        row.append( cell )

    # add the row of data to our dataset
    dataset.append( row )
    #print ( row )

#print( dataset )

# decompose dataset into header and data
header = dataset[ 0 ]
data = dataset[ 1 : ]

# print the header
print( 'header:' )
print( header )

# print the dataset data
print( 'data:' )
print( data )