''' Purpose: practice analyzing data from a useful dataset
'''

# import helpful library to stream dataset aquisition
import url

# Note: if you have problems with header and whitespace, please redownload url.py from the website

##########  get the dataset to be analyzed  ##########

# specify data source
DATASET_SOURCE = 'http://www.cs.virginia.edu/~cs1112/datasets/csv/best-sellers.csv'

# get data for the dataset
dataset = url.get_and_parse_dataset( DATASET_SOURCE )

#print( 'dataset: ', dataset )

# determine dataset header
header = dataset[ 0 ] # first line of dataset is just the header
print("header:", header)

# determine and print books of the dataset
books = dataset[ 1 : ] # excludes first row (i.e. the header)

# determine number of books
nbr_books = len( books ) # number of rows in dataset books

# Initialize an accumulator
total_sales = 0 # integer used for adding numbers
sales_list = [] # list used for saving each sales item
for book in books:
    # print(book)
    # Get each item of information for each book
    name, author, language, date, sales = book
    # print(name)

    # Convert the string sales to a number
    sales = int(sales)

    # Add sales to our running total
    total_sales = total_sales + sales
    # print(total_sales) # Print running total each iteration of the loop

    sales_list.append(sales) # Store each individual sales item in our list

average_sales = total_sales // nbr_books # Two ways to do it
also_average_sales = sum(sales_list) // nbr_books # Either way is fine
print(also_average_sales)
print("Average sales:", average_sales)

#########  assist the user ##########

# get the user's interest in the dataset
reply = input( 'Enter column of interest: ' )

# Clean up input
reply = reply.strip()
label  = reply.capitalize()

# determine the column number for the label they are interested in
# print("header:", header)
column = header.index(label) # index is for lists; find is only for strings
print(column)

# get the dataset elements for that column

# Initialize accumulator
data_cells = []
for book in books:
    # Get data entry for column we are interested in for each book
    cell = book[column]
    # print(cell)

    # Store that data entry in our list
    data_cells.append(cell)
print(data_cells)

# Get min data entry from the column
min_cell = min(data_cells)
# Note: strings are ordered alphabetically
# e.g. Cat is less than Dog

max_cell = max(data_cells)
# if cells are strings, lower case letters will be greater than the upper case letters
# e.g. dog > Dog
# You don't really need to memorize this though
print(max_cell)
print(min_cell)