''' Purpose: for a user-specified dataset, dataset
                 a user-specified column label, label
                 a column value, key
             counts the number of rows whose column values equals key
'''

# get a hold of helpful web functions
import url

# specify base web folder for datasets
CSV_REPOSITORY = 'http://www.cs.virginia.edu/~cs1112/datasets/csv/'

# get dataset of interest
reply = input( 'Enter the name of a data set: ' )

name = reply.strip()

link = CSV_REPOSITORY + name
#'http://www.cs.virginia.edu/~cs1112/datasets/csv/wdii.csv'

# get_contents() is used when you want to get the webpage contents
# as a big string of what's on the webpage
# get_dataset is used when you want to get the we want to convert a csv
# on a webpage into a dataset (list of lists)
# For url problems, think about whether it's a csv vs. just text
# Usuallyyyy if it's a csv we wanna convert it to a dataset using get_dataset()
# like in wdii.csv.
# Otherwise, we can just use get_contents() to get the text on the page like
# in macaronic.txt.

dataset = url.get_dataset( link ) # get dataset from csv at link
#print('dataset=', dataset)

# get label for the column of interest
reply = input( 'Enter column label for the data set: ' ) # Name of Column

label = reply.strip()

# get key value for column of interest
reply = input( 'Enter the key: ' ) # How many times does this key occur in the
                                    # column of interest?
                                    # i.e. how many times does
                                    # "No holiday observed" occur in
                                    # column "GWBD"?

# The key is just a value in the column of interest we're trying to count.

key = reply.strip()

print()

# identify header and data from the dataset
                        # 0     1       2     3
header = dataset[ 0 ] #['ID','State','GWBD','When']

table  = dataset[ 1 : ] # All data past header (all rows past header)
                        # List of lists without the first row header

# get the index i of the dataset column of interest

i = header.index( label ) # Find index of the column we want to look at

# Label gives you the column we're looking at.
# We get the index of the label and look at that index for each row.
# We're trying to find how many times the key occurs in the column.
# So if key is "No holiday observed" how many times is "No holiday observed"
# in the column "GWBD"?

# .find() is used for strings to find the index where something
# occurs in a string.
# .index() is used to find the index of something in a list.

#              0     1       2      3
# print( i ) #['ID','State','GWBD','When'] is 0, 1, 2, 3 for indexes for columns

# get from dataset, a list of values for the column of interest
list_column_values = [] # We want to get each column value from each row
                        # and add it to our list of column values
for row in table: # All data values (rows past header)
    # print('row=', row)
    column_value = row[i] # The value in the row at that column index
    # print('column value=', column_value)
    list_column_values.append( column_value ) # Add the column value to our list


# count from that list of values, the number of values equalling the key
# print('List of Column Values = ', list_column_values)

count_key_in_column = list_column_values.count( key )

# print the total
print( count_key_in_column )