''' Purpose: dataset nuance introduction
'''

# REVIEW CONCEPTS IN THIS MODULE - once you get it, it's pretty straightforward

# define and print dataset header
# these are the names of our columns for the table of books below
header = [ "Name",                             "Author",           "Language", "Date", "Sales" ]

print( "header:", header )

print()

# determine and print index of names, sales, and date columns of dataset
# here we are determine where certain columns/elements are in a list
# some_list.index( thing_you_want_to_find_in_some_list )  --> gives you the index at which the thing
# first occurs in the list some_list; must exactly match the thing in the ()
# find() is for strings only; use index() for lists
sales_column = header.index( 'Sales' )   # where is 'Sales' in the list header --> give back the index position
name_column  = header.index( 'Name' )    # where is 'Name' in the list header
date_column  = header.index( 'Date' )    # where is 'Date' in the list header

# subscript operator [] --> wants an INT index --> gives you the ELEMENT at that index in the sequence
# index() or find() --> wants the ELEMENT as an argument --> gives you the INDEX of that element at which
#   it first occurs

print( 'sales column:', sales_column )
print( 'name  column:', name_column )
print( 'date  column:', date_column )

print()


# everything between triple quotes will be ignored

# define dataset
# each sublist is a book info; each column is each piece of info about the book
# organized so that it matches the scheme/names in the list header
# list can hold multiple types of information: strings, numbers, lists, etc
books = [
         [ "Alice's Adventures in Wonderland", "Carroll",          "English",  1865,   100000000 ],
         [ "And Then There Were None",         "Christie",         "English",  1939,   100000000 ],
         [ "Dream of the Red Chamber",         "Xueqin",           "Chinese",  1754,   100000000 ],
         [ "Don Quixote",                      "de Cervantes",     "Spanish",  1605,   500000000 ],
         [ "Harry Potter",                     "Rowling",          "English",  1997,   447000000 ],
         [ "The Hobbit",                       "Tolkien",          "English",  1937,   150000000 ],
         [ "The Little Prince",                "de Saint-Exupery", "French",   1943,   150000000 ],
         [ "The Lord of the Rings",            "Tolkien",          "English",  1954,   150000000 ],
         [ "A Tale of Two Cities",             "Dickens",          "English",  1859,   200000000 ],
]

print( "books:", books )    # printing datasets is not pretty b/c they just put everything on 1 line

print()

# print the rows of the dataset
# so that each row is on a different line
# this for loop helps us visually process the dataset structure and information
for row in books :
    print( 'row:', row )

print()

# determine total book sold amongst the top best sellers of all time
total = 0
# for each book in our dataset named books - named book a book b/c it makes sense (could have named it other things)
for book in books :
    sold = book[ sales_column ]    # get the sales column of each book --> get the value of sales
                                   # we know which column has the sales/number of books sold b/c we've
                                   # determined it above this
    total = total + sold           # add to the total - accumulate

print( 'total sold:', total )   # print the final value of accumulator

print()

# build a list of the book publication dates
# then we can use this list of dates with the functions min() and max()
# to find the earliest and latest publication date respectively
dates = []   # since we are building a list, start with an empty list
for row in books :
    year = row[ date_column ]    # for each book/row, then find the date column which we've determined previously
    dates.append( year )         # append this date to our list accumulator dates

print( 'dates:', dates )

print()

# determine earliest and latest publication date
# min() and max() --> pass in a list as argument --> return the min and max values respectively
earliest = min( dates )
latest   = max( dates )

print( 'earliest:', earliest )
print( 'latest  :', latest )

print()

# determine average publication date

date_total   = sum( dates )   # <--------------------- HUH?  sum( x ) is a built-in function
                              # pass in a list of numbers as an argument --> returns the sum/total
nbr_of_dates = len( dates )   # get how many dates there are

average_date = date_total // nbr_of_dates

print( 'average date:', average_date )

print()

# determine earliest and latest published books

# to do so need to first find their indices into dates list, those
#     indices correspond to the row indices into books list
row_earliest = dates.index( earliest )   # what is the index of the earliest date
row_latest   = dates.index( latest )     # what is the index of the latest date
# the indices of these dates match the row of the book they came from

print( 'row with earliest book:', row_earliest )
print( 'row with latest book  :', row_latest )

print()

# use those indices to look at corresponding rows into books dataset
# since the dates index match the books' index
earliest_row = books[ row_earliest ]    # get the book with the earliest publication date
latest_row   = books[ row_latest ]      # get the book with the latest publication date

# print those rows
print( 'info on earliest:', earliest_row )
print( 'info on latest:  ', latest_row )

print()

# print just the names of those books
name_column  = header.index( 'Name' )

earliest_name = earliest_row[ name_column ]  # earliest row = the entire row that is the book with the earliest date
                                             # then we can pick out a piece of info from this book
                                             # here, we want the name of the book, which is at the index name_column
latest_name   = latest_row[ name_column ]    # same process as above here

print( 'name of earliest:', earliest_name )
print( 'name of latest:  ', latest_name )

'''             <---- will move down to reveal more of the program
'''