''' Purpose: dataset nuance introduction ''' # REVIEW CONCEPTS IN THIS MODULE - once you get it, it's pretty straightforward # define and print dataset header # these are the names of our columns for the table of books below header = [ "Name", "Author", "Language", "Date", "Sales" ] print( "header:", header ) print() # determine and print index of names, sales, and date columns of dataset # here we are determine where certain columns/elements are in a list # some_list.index( thing_you_want_to_find_in_some_list ) --> gives you the index at which the thing # first occurs in the list some_list; must exactly match the thing in the () # find() is for strings only; use index() for lists sales_column = header.index( 'Sales' ) # where is 'Sales' in the list header --> give back the index position name_column = header.index( 'Name' ) # where is 'Name' in the list header date_column = header.index( 'Date' ) # where is 'Date' in the list header # subscript operator [] --> wants an INT index --> gives you the ELEMENT at that index in the sequence # index() or find() --> wants the ELEMENT as an argument --> gives you the INDEX of that element at which # it first occurs print( 'sales column:', sales_column ) print( 'name column:', name_column ) print( 'date column:', date_column ) print() # everything between triple quotes will be ignored # define dataset # each sublist is a book info; each column is each piece of info about the book # organized so that it matches the scheme/names in the list header # list can hold multiple types of information: strings, numbers, lists, etc books = [ [ "Alice's Adventures in Wonderland", "Carroll", "English", 1865, 100000000 ], [ "And Then There Were None", "Christie", "English", 1939, 100000000 ], [ "Dream of the Red Chamber", "Xueqin", "Chinese", 1754, 100000000 ], [ "Don Quixote", "de Cervantes", "Spanish", 1605, 500000000 ], [ "Harry Potter", "Rowling", "English", 1997, 447000000 ], [ "The Hobbit", "Tolkien", "English", 1937, 150000000 ], [ "The Little Prince", "de Saint-Exupery", "French", 1943, 150000000 ], [ "The Lord of the Rings", "Tolkien", "English", 1954, 150000000 ], [ "A Tale of Two Cities", "Dickens", "English", 1859, 200000000 ], ] print( "books:", books ) # printing datasets is not pretty b/c they just put everything on 1 line print() # print the rows of the dataset # so that each row is on a different line # this for loop helps us visually process the dataset structure and information for row in books : print( 'row:', row ) print() # determine total book sold amongst the top best sellers of all time total = 0 # for each book in our dataset named books - named book a book b/c it makes sense (could have named it other things) for book in books : sold = book[ sales_column ] # get the sales column of each book --> get the value of sales # we know which column has the sales/number of books sold b/c we've # determined it above this total = total + sold # add to the total - accumulate print( 'total sold:', total ) # print the final value of accumulator print() # build a list of the book publication dates # then we can use this list of dates with the functions min() and max() # to find the earliest and latest publication date respectively dates = [] # since we are building a list, start with an empty list for row in books : year = row[ date_column ] # for each book/row, then find the date column which we've determined previously dates.append( year ) # append this date to our list accumulator dates print( 'dates:', dates ) print() # determine earliest and latest publication date # min() and max() --> pass in a list as argument --> return the min and max values respectively earliest = min( dates ) latest = max( dates ) print( 'earliest:', earliest ) print( 'latest :', latest ) print() # determine average publication date date_total = sum( dates ) # <--------------------- HUH? sum( x ) is a built-in function # pass in a list of numbers as an argument --> returns the sum/total nbr_of_dates = len( dates ) # get how many dates there are average_date = date_total // nbr_of_dates print( 'average date:', average_date ) print() # determine earliest and latest published books # to do so need to first find their indices into dates list, those # indices correspond to the row indices into books list row_earliest = dates.index( earliest ) # what is the index of the earliest date row_latest = dates.index( latest ) # what is the index of the latest date # the indices of these dates match the row of the book they came from print( 'row with earliest book:', row_earliest ) print( 'row with latest book :', row_latest ) print() # use those indices to look at corresponding rows into books dataset # since the dates index match the books' index earliest_row = books[ row_earliest ] # get the book with the earliest publication date latest_row = books[ row_latest ] # get the book with the latest publication date # print those rows print( 'info on earliest:', earliest_row ) print( 'info on latest: ', latest_row ) print() # print just the names of those books name_column = header.index( 'Name' ) earliest_name = earliest_row[ name_column ] # earliest row = the entire row that is the book with the earliest date # then we can pick out a piece of info from this book # here, we want the name of the book, which is at the index name_column latest_name = latest_row[ name_column ] # same process as above here print( 'name of earliest:', earliest_name ) print( 'name of latest: ', latest_name ) ''' <---- will move down to reveal more of the program '''