''' Purpose: for a user-specified dataset, dataset a user-specified column label, label a column value, key counts the number of rows whose column values equals key ''' # get a hold of helpful web functions import url # specify base web folder for datasets CSV_REPOSITORY = 'http://www.cs.virginia.edu/~cs1112/datasets/csv/' # get dataset of interest reply = input( 'Enter the name of a data set: ' ) name = reply.strip() link = CSV_REPOSITORY + name #'http://www.cs.virginia.edu/~cs1112/datasets/csv/wdii.csv' # get_contents() is used when you want to get the webpage contents # as a big string of what's on the webpage # get_dataset is used when you want to get the we want to convert a csv # on a webpage into a dataset (list of lists) # For url problems, think about whether it's a csv vs. just text # Usuallyyyy if it's a csv we wanna convert it to a dataset using get_dataset() # like in wdii.csv. # Otherwise, we can just use get_contents() to get the text on the page like # in macaronic.txt. dataset = url.get_dataset( link ) # get dataset from csv at link #print('dataset=', dataset) # get label for the column of interest reply = input( 'Enter column label for the data set: ' ) # Name of Column label = reply.strip() # get key value for column of interest reply = input( 'Enter the key: ' ) # How many times does this key occur in the # column of interest? # i.e. how many times does # "No holiday observed" occur in # column "GWBD"? # The key is just a value in the column of interest we're trying to count. key = reply.strip() print() # identify header and data from the dataset # 0 1 2 3 header = dataset[ 0 ] #['ID','State','GWBD','When'] table = dataset[ 1 : ] # All data past header (all rows past header) # List of lists without the first row header # get the index i of the dataset column of interest i = header.index( label ) # Find index of the column we want to look at # Label gives you the column we're looking at. # We get the index of the label and look at that index for each row. # We're trying to find how many times the key occurs in the column. # So if key is "No holiday observed" how many times is "No holiday observed" # in the column "GWBD"? # .find() is used for strings to find the index where something # occurs in a string. # .index() is used to find the index of something in a list. # 0 1 2 3 # print( i ) #['ID','State','GWBD','When'] is 0, 1, 2, 3 for indexes for columns # get from dataset, a list of values for the column of interest list_column_values = [] # We want to get each column value from each row # and add it to our list of column values for row in table: # All data values (rows past header) # print('row=', row) column_value = row[i] # The value in the row at that column index # print('column value=', column_value) list_column_values.append( column_value ) # Add the column value to our list # count from that list of values, the number of values equalling the key # print('List of Column Values = ', list_column_values) count_key_in_column = list_column_values.count( key ) # print the total print( count_key_in_column )