//import cs201j.*; import java.net.*; import java.io.*; import java.util.Vector; import java.util.Enumeration; public class GrabNames { // // Produces a file of names and popularity from the http://www.ssa.gov/OACT/babynames/index.html data // public static void main (/*@non_null@*/ String args[]) throws RuntimeException { if (args.length != 1 && args.length != 3) { System.err.println ("Usage: java GrabNames "); System.exit (1); } String url = args[0]; try { StructuredReader reader = new StructuredReader (new URL (url).openStream ()); int lineno = 1; StringTable mnames = new StringTable (); StringTable fnames = new StringTable (); // Ad-hoc parser for extracting name information from web pages. // // The name lines start with n // String linePrefix = ""; int listno = 0; // Find total number - because of line spacing this is a bit shaky... reader.readThroughString ("males in the sample is "); int nmales = reader.readCommaInt (); Assert.check (nmales > 0); System.err.println ("Sample has " + nmales + " males."); reader.readThroughString ("is "); int nfemales = reader.readCommaInt (); Assert.check (nfemales > 0); System.err.println ("Sample has " + nfemales + " females."); while (true) { try { reader.readThroughString (linePrefix); int readno = reader.readInt (); listno++; if (readno != listno) { System.err.println ("Error: line mismatch " + readno + " / " + listno); } reader.readThroughString (""); String mname = reader.readThroughAny ("<"); if (mname.length () == 0) { // No male name on this line (but, there may be more female names) reader.readThroughString ("br>
"); } else { reader.readUntilAny ("0123456789"); int mcount = reader.readInt (); reader.readThroughString (""); // System.err.println ("Line " + readno + ": M " + mname + " [" + mcount + "]"); try { mnames.addName (mname, ((double) mcount / (double) nmales)); } catch (DuplicateEntryException e) { System.err.println ("Duplicate name: " + mname); } } String fname = reader.readThroughAny ("<"); if (fname.length () == 0) { // No female name on this line (but, there may be more male names) reader.readThroughString ("br>
"); } else { reader.readUntilAny ("0123456789"); int fcount = reader.readInt (); // System.err.println ("Line " + readno + ": F " + fname + " [" + fcount + "]"); fnames.addName (fname, ((double) fcount / (double) nfemales)); } } catch (EOFException e) { // Okay, this is normal break; } catch (IOException e) { System.err.println ("Error: " + e); } catch (DuplicateEntryException e) { System.err.println ("Duplicate name: " + e); } catch (NoNumberException e) { System.err.println ("No number: " + e); } } if (args.length == 3) { PrintWriter out; try { out = new PrintWriter (new FileOutputStream (args[1])); out.println (mnames.toString ()); out.close (); } catch (IOException e) { System.err.println ("Error: cannot write output file " + args[1] + ": "+ e); } try { out = new PrintWriter (new FileOutputStream (args[2])); out.println (fnames.toString ()); out.close (); } catch (IOException e) { System.err.println ("Error: cannot write output file " + args[2] + ": "+ e); } } else { System.err.println (mnames.toString ()); System.err.println (fnames.toString ()); } } catch (MalformedURLException e) { System.err.println ("Error: " + e); } catch (IOException e) { System.err.println ("Error: " + e); } catch (NoNumberException e) { System.err.println ("Malformed file: " + e); } } }