''' Purpose: practice getting data from a useful dataset
'''

# get url access capability from cs 1112 module
from url import get_contents

# define the base folder for course csv datasets
CSV_WEB_FOLDER = "http://www.cs.virginia.edu/~cs1112/datasets/csv/"
# specify data source

# get name of the dataset
reply = input( "Enter name of dataset: " )

# clean up the reply to get file name
file_name = reply.strip()

# get url link for dataset
link = CSV_WEB_FOLDER + file_name

# get contents of the page
text = get_contents( link )
# Get me the text version of the contents of the webpage

# clean up contents
text = text.strip() # Get rid of leading and trailing whitespace

# turn the text into data; i.e., a list of lines
lines = text.split( "\n" ) # Generate a list of lines using .split('\n')
# called on the newline character

# get csv rows into dataset from
dataset = [] # A dataset is a list of lists so we start off with
# an empty list

for line in lines :
    # clean up the line
    line = line.strip()

    # split the line to cells
    cells = line.split(",") # Commas separating each item in your
    # larger line is going to be each cell in the row
    # print ( cells )
    row = [] # Each row in the dataset (dataset is rows (lists))
    for cell in cells :
        # clean up the cell
        cell = cell.strip()
        # add the cell to the row
        row.append( cell )
    # print( row ) # THESE ARE MY ROWS (my original lines in the text)
    # add the row of data to our dataset
    dataset.append( row ) # Add each row (based on line in text)
    # to my larger dataset

# decompose dataset into header and data
header = dataset[ 0 ] # THIS IS THE FIRST ROW IN DATASET (first line in text)
data = dataset[ 1 : ] # THIS is subscripting each row in dataset from
# the second row all the way to the last row
# [1:] is everything from row 1 on

# print the header
print( "header:" )
print( header )

print()

# print the dataset data
print( "data:" )
print( data )