''' Purpose: practice analyzing data from a useful dataset ''' # import helpful library to stream dataset aquisition import url # Note: if you have problems with header and whitespace, please redownload url.py from the website ########## get the dataset to be analyzed ########## # specify data source DATASET_SOURCE = 'http://www.cs.virginia.edu/~cs1112/datasets/csv/best-sellers.csv' # get data for the dataset dataset = url.get_and_parse_dataset( DATASET_SOURCE ) #print( 'dataset: ', dataset ) # determine dataset header header = dataset[ 0 ] # first line of dataset is just the header print("header:", header) # determine and print books of the dataset books = dataset[ 1 : ] # excludes first row (i.e. the header) # determine number of books nbr_books = len( books ) # number of rows in dataset books # Initialize an accumulator total_sales = 0 # integer used for adding numbers sales_list = [] # list used for saving each sales item for book in books: # print(book) # Get each item of information for each book name, author, language, date, sales = book # print(name) # Convert the string sales to a number sales = int(sales) # Add sales to our running total total_sales = total_sales + sales # print(total_sales) # Print running total each iteration of the loop sales_list.append(sales) # Store each individual sales item in our list average_sales = total_sales // nbr_books # Two ways to do it also_average_sales = sum(sales_list) // nbr_books # Either way is fine print(also_average_sales) print("Average sales:", average_sales) ######### assist the user ########## # get the user's interest in the dataset reply = input( 'Enter column of interest: ' ) # Clean up input reply = reply.strip() label = reply.capitalize() # determine the column number for the label they are interested in # print("header:", header) column = header.index(label) # index is for lists; find is only for strings print(column) # get the dataset elements for that column # Initialize accumulator data_cells = [] for book in books: # Get data entry for column we are interested in for each book cell = book[column] # print(cell) # Store that data entry in our list data_cells.append(cell) print(data_cells) # Get min data entry from the column min_cell = min(data_cells) # Note: strings are ordered alphabetically # e.g. Cat is less than Dog max_cell = max(data_cells) # if cells are strings, lower case letters will be greater than the upper case letters # e.g. dog > Dog # You don't really need to memorize this though print(max_cell) print(min_cell)