''' Purpose: practice getting data from a useful dataset ''' # get url access capability from cs 1112 module from url import get_contents # define the base folder for course csv datasets CSV_WEB_FOLDER = "http://www.cs.virginia.edu/~cs1112/datasets/csv/" # specify data source # get name of the dataset reply = input( "Enter name of dataset: " ) # clean up the reply to get file name file_name = reply.strip() # get url link for dataset link = CSV_WEB_FOLDER + file_name # get contents of the page text = get_contents( link ) # Get me the text version of the contents of the webpage # clean up contents text = text.strip() # Get rid of leading and trailing whitespace # turn the text into data; i.e., a list of lines lines = text.split( "\n" ) # Generate a list of lines using .split('\n') # called on the newline character # get csv rows into dataset from dataset = [] # A dataset is a list of lists so we start off with # an empty list for line in lines : # clean up the line line = line.strip() # split the line to cells cells = line.split(",") # Commas separating each item in your # larger line is going to be each cell in the row # print ( cells ) row = [] # Each row in the dataset (dataset is rows (lists)) for cell in cells : # clean up the cell cell = cell.strip() # add the cell to the row row.append( cell ) # print( row ) # THESE ARE MY ROWS (my original lines in the text) # add the row of data to our dataset dataset.append( row ) # Add each row (based on line in text) # to my larger dataset # decompose dataset into header and data header = dataset[ 0 ] # THIS IS THE FIRST ROW IN DATASET (first line in text) data = dataset[ 1 : ] # THIS is subscripting each row in dataset from # the second row all the way to the last row # [1:] is everything from row 1 on # print the header print( "header:" ) print( header ) print() # print the dataset data print( "data:" ) print( data )