''' Purpose: practice getting data from a useful dataset
'''

# define the base folder for course csv datasets
CSV_WEB_FOLDER = 'http://www.cs.virginia.edu/~cs1112/datasets/csv/'
# .csv is a file format standing for "comma-separated file"
# these files are giant text files separating cells with commas and rows with \n

import url
# you need url.py in your CS 1112 file from now until the end of the course
# we will add new things to it and have you download a new one. please listen and have this module in your folder

# get name of the dataset
reply = input( 'Enter name of dataset: ' )

print()

# clean up the reply to get file name
file_name = reply.strip()
# we are trusting the user to type the dataset name in all lowercase
# some file names have upper case letters, so using lower() could mess things up

# get url link for dataset
link = CSV_WEB_FOLDER + file_name

# get the contents of the web file
dataset = url.get_contents( link )

print( 'dataset ( using get_contents() -> big string )' )
print( dataset ) # notice that above dataset was acquired using the get_contents() function. dataset here is a giant string with the whole dataset
print()

reply = input( 'Enter when ready: ' )

# get dataset from the web
dataset = url.get_dataset( link ) # now dataset will be a list of lists
print( "dataset ( using get_dataset() -> list of lists )" )
# now it's going to print as a long list of lists all on one line. if we want to print line by line, we must do that in a loop
# ** get_dataset() casts numbers into numbers for us. isn't that wonderful!
print( dataset )
print()

# divide dataset into header and data
header = dataset[ 0 ] # the first row is our header row, telling us what all of our data means
data   = dataset[ 1 : ] # from the first row on is going to be the actual data in the function
# slicing review:
    # [ i : j ] -> grab everything starting at position i and going up to j-1
    # [ : j ]   -> grab everything starting at position 0 and going up to j-1
    # [ i : ]   -> grab everything starting at position i and going to the end

# print the header
print( 'header:' )
print( header )
print()

# print the dataset data
print( 'data:' )

for row in data :
    print( row )

print()
# sum of a numeric dataset:
    # for loop and accumulation
    # if you want the sum of a specific row i, summation = sum( dataset[ i ] )
# how complicated would it be to go to webpage and takes unstructured data into a dataset? hard
    # we work with .csv files because they already have a structure, and the code used to parse the files uses those structures