''' Purpose: dataset nuance introduction
'''

# define and print dataset header
header = [ "Name",                             "Author",           "Language", "Date", "Sales" ]
# header describes dataset; contains column names
# from a web file, it is the first row (index 0); assuming that we know this
# <header> is a variable that we assign the first row of a dataset; it is not a keyword or predefined in any way

print( "header:", header )

print()

# determine and print index of names, sales, and date columns of dataset
# find where each column name is; assuming that we already know what the column names are
# index() function returns the index place in the <header> list of what's inside the ()
sales_column = header.index( 'Sales' ) # sales_column = 4
name_column  = header.index( 'Name' )  # name_column = 0
date_column  = header.index( 'Date' )  # date_column = 3

print( 'sales column:', sales_column )
print( 'name  column:', name_column )
print( 'date  column:', date_column )

print()

# define dataset called <books> (made up) because it contains some "book" information
# books = list of lists
# books itself is a list [] --> contains lists in it; every list is denoted within [] --> hence [ [], [], ... ]
books = [
         [ "Alice's Adventures in Wonderland", "Carroll",          "English",  1865,   100000000 ],
         [ "And Then There Were None",         "Christie",         "English",  1939,   100000000 ],
         [ "Dream of the Red Chamber",         "Xueqin",           "Chinese",  1754,   100000000 ],
         [ "Don Quixote",                      "de Cervantes",     "Spanish",  1605,   500000000 ],
         [ "Harry Potter",                     "Rowling",          "English",  1997,   447000000 ],
         [ "The Hobbit",                       "Tolkien",          "English",  1937,   150000000 ],
         [ "The Little Prince",                "de Saint-Exupery", "French",   1943,   150000000 ],
         [ "The Lord of the Rings",            "Tolkien",          "English",  1954,   150000000 ],
         [ "A Tale of Two Cities",             "Dickens",          "English",  1859,   200000000 ],
] #end of list
# formatted like this for easier readability
# white spaces do not matter

# books[r] = a row in the books list; or another sublist
# books[0] = [ "Alice's Adventures in Wonderland", "Carroll",          "English",  1865,   100000000 ]

# books[r][c] = each column in the sublist/row books[r]
    # if r = 0 and c = 0
        # books[0][0] = "Alice's Adventures in Wonderland"
    # if r = 0 and c = 1
        # books[0][1] = "Carroll"
    # if r = 1 and c = 1
        # books[1][1] = "Christie"

print( "books:", books )
# printing list of lists is ugly

print()

# print the rows of the dataset <books>
for row in books :
    print( 'row:', row )
# <row> is a variable we assigned to each sublist inside the list of lists <books>
# a sublist in a dataset is also referred to as a "row"

print()

# determine total book sold amongst the top best sellers of all time
total = 0 # accumulator for the total number of sales for all books
for book in books :
    # for each <book> in <books>
    # <book> is also a row in the <books> dataset
    print( book )
    sold = book[ sales_column ] # look at the column <sales_column>;
    # we found sales_column earlier; it contains the index of the column called "Sales" in the <header> variable

    print( "   sold:", sold )
    total = total + sold  # accumulate the total from the total number of sales of each book
    print( "total so far:", total )

print( 'total sold:', total )

billions_of_books = total / ( 10 ** 9 ) # moves the decimal point to the left by 9 places
# 10 ** 9 = 10 ^ 9 = 1 Billion

print( 'total sold:', billions_of_books, 'billion' )

print()

# build a list of the book publication dates
# want to know when each book was published
    # which book published first
    # which book published last

# need to isolate the values in the "Dates" column from each book

dates = [] # accumulate a list of all the dates from each book
for row in books :
    # for each row in <books>
    # each row is a list in itself
    print( row )
    year = row[ date_column ] # go to the date_column in <row> and take the info --> put into <year>
    # we found date_column earlier; it contains the index of the column called "Date" in the <header> variable

    print( "    year:", year )

    dates.append( year ) # how you add to a list; lists do not need to be assigned to a variable
    # because lists are mutable = can be changed directly

    print( "    dates so far:", dates )
    print()

print( 'dates:', dates ) # print the final version of <dates> after the for loop has finished

print()

# determine earliest and latest publication date
earliest = min( dates ) # min() takes in a sequence inside () and find the minimum value in that sequence
latest   = max( dates ) # max() takes in a sequence inside () and find the maximum value in that sequence

print( 'earliest:', earliest )
print( 'latest  :', latest )

print()

# determine average publication date

# to find sum of the dates, we can do a for loop and accumulate the total
date_total   = sum( dates )   # sum() is a built-in function; give it a sequence like <dates> --> find the total of the values
# inside <dates>

# sequence needs to be a list of int/float only!
# [ 1, 2, 3 ] works
# [ 1, '2', 3 ] will not

nbr_of_dates = len( dates ) # find how many dates there are in the list <dates>

average_date = date_total // nbr_of_dates

print( 'average date:', average_date )

print()

# determine earliest and latest published books
# here, we must find the books using the dates

# to do so need to first find their indices into dates list, those
#     indices correspond to the row indices into books list
row_earliest = dates.index( earliest ) # find which row in <dates> has the earliest date; <earliest> was the date we've found earlier
row_latest   = dates.index( latest ) # find which row in <dates> has the latest date; <latest> was the date we've found earlier


print( 'row with earliest book:', row_earliest )
print( 'row with latest book  :', row_latest )

print()

# use those indices to look at corresponding rows into <books> dataset
# the indices in <dates> match with the indices in <books>!!!
    # this is because the for loop earlier goes through each row in <books> in order,
    # then we appended each date in the same order into the list <dates>
earliest_row = books[ row_earliest ] # this is the book that was published first
# here, we found the index of the row with the earliest date in the list <dates>
# because the index matches, we can use the same index to look into the dataset <books> to get the <book> as list
# this will give you the book row that has the earliest date
latest_row   = books[ row_latest ]   # this is the book that was published last

'''
books [                                                                                                      dates [
    [ "Alice's Adventures in Wonderland", "Carroll",          "English",  1865,   100000000 ],                  1865,
    [ "And Then There Were None",         "Christie",         "English",  1939,   100000000 ],                  1939,
    [ "Dream of the Red Chamber",         "Xueqin",           "Chinese",  1754,   100000000 ],                  1754,
    [ "Don Quixote",                      "de Cervantes",     "Spanish",  1605,   500000000 ],   row_earliest   1605, --> earliest_row in books
    [ "Harry Potter",                     "Rowling",          "English",  1997,   447000000 ],   row_latest     1997, --> latest_row in books
    [ "The Hobbit",                       "Tolkien",          "English",  1937,   150000000 ],                  1937,
    [ "The Little Prince",                "de Saint-Exupery", "French",   1943,   150000000 ],                  1943,
    [ "The Lord of the Rings",            "Tolkien",          "English",  1954,   150000000 ],                  1954,
    [ "A Tale of Two Cities",             "Dickens",          "English",  1859,   200000000 ],                  1859,    
]                                                                                                             ]
'''

# print those rows
print( 'info on earliest:', earliest_row )
print( 'info on latest:  ', latest_row )

print()

# print just the names of those books
name_column  = header.index( 'Name' )

# earliest_row = [ "Don Quixote",                      "de Cervantes",     "Spanish",  1605,   500000000 ]
# earliest_row[ 0 ] = "Don Quixote"
# earliest_row[ 1 ] "de Cervantes"

earliest_name = earliest_row[ name_column ] # earliest_row is just defined above; name_column = 0 because that is where
    # "Name" is in the list <header>
# here we have a row of book information (aka the book that was published first)
# it contains Name, Author, etc
# since it's a list, we can also peek into each column like Name, Author, Sales, etc.

latest_name   = latest_row[ name_column ] # same process

# You have all the tools already! :)

print( 'name of earliest:', earliest_name )
print( 'name of latest:  ', latest_name )

'''
'''