#include #include "stdarg.h" #ifndef SELECT_H #define SELECT_H using namespace std; extern enum TAG { STYLE, CSTYLE, HTML, TABLE, OTHER, TR, TD, CHTML, CTABLE, CTR, CTD, LIST, CLIST, LI, SCRIPT, CSCRIPT, P, CP}; extern TAG tagextractor(fstream &fin); extern void tagprocessor(TAG &ReturnedTag, fstream &fin, ofstream &fout); extern TAG ReturnedTag; extern char inp; extern char *cp = &inp; extern string buffer; class SRow; class SCol; class STable; string SDealRow(fstream &sin, ofstream &sout,int &rows, vector &selectline,bool Search, int therow); string SDealCol(fstream &sin, ofstream &sout,int &cols, vector &selectline,bool Search, int thecol); // Table class definition // ////////////////////////////////////////////////////////////////////// class STable { public: STable(); string ExtractTableDetails(fstream &sin, ofstream &sout, vector &selectline,bool Search); //int subtables; int tablenum; bool open; int rows; }; STable::STable(){ open = true; rows = 0; // subtables = 0; tablenum = 0; } // Extracts the details of a table string STable::ExtractTableDetails(fstream &sin, ofstream &sout, vector &selectline,bool Search){ int searchrow = 0; if(Search){ searchrow = selectline[selectline.size()-1]; selectline.pop_back(); // selectline.resize(selectline.size()-1); } //sout << "Table " < 0){ if(rows == searchrow){ //num--; return SDealRow(sin, sout, rows, selectline, true, searchrow); } } } SDealRow(sin, sout, rows, selectline, false, searchrow); } else if(ReturnedTag == CTABLE){ open = false; //or break } } } return "nothing"; } // Page class definition // ////////////////////////////////////////////////////////////////////// class SPage { public: SPage(); string Select(fstream &sin, ofstream &sout, vector &selectline); int tables; }; SPage::SPage(){ tables = 0; } string SPage::Select(fstream &sin, ofstream &sout, vector &selectline){ tables = 0; sin.seekg(0, ios::beg); //cout << sin.tellg(); tell poisition of reading pointer //va_list arguments; //va_start(arguments, x); int table = selectline[selectline.size()-1]; selectline.pop_back(); // selectline.resize(selectline.size()-1); while( !sin.eof() ){ sin.get(inp); //strlwr(cp); to lower case if(inp == '<'){ ReturnedTag = tagextractor(sin); if(ReturnedTag == TABLE){ STable Tchild; ++tables; Tchild.tablenum = tables; if( selectline.size() > 0) { if(table == tables){ //--x; return Tchild.ExtractTableDetails(sin,sout,selectline,true); } else Tchild.ExtractTableDetails(sin,sout,selectline,false); } } //tagprocessor(ReturnedTag, sin, sout); } // EXTRACT TABLE ITEM else { //cout << inp; } } return "nothing"; } // Row class definition // ////////////////////////////////////////////////////////////////////// class SRow { public: SRow(); string ExtractColumns(fstream &sin, ofstream &sout, vector &selectline,bool Search); int rownum; int cols; }; SRow::SRow(){ cols = 0; rownum = 0; } string SRow::ExtractColumns(fstream &sin, ofstream &sout, vector &selectline ,bool Search){ int searchcol = 0; if(Search){ searchcol = selectline[selectline.size()-1]; selectline.pop_back(); //selectline.resize(selectline.size()-1); } while( !sin.eof() ){ sin.get(inp); if(inp == '<'){ ReturnedTag = tagextractor(sin); if(ReturnedTag == TD){ cols++; if(Search){ if(selectline.size() >= 0 ) { if(cols == searchcol){ // num--; return SDealCol(sin, sout, cols, selectline, true, searchcol); } } } SDealCol(sin, sout, cols,selectline , false, searchcol); } else if(ReturnedTag == CTR){break;} } } return "nothing"; } // Column class definition // ////////////////////////////////////////////////////////////////////// class SCol { public: SCol(); string ExtractData(fstream &sin, ofstream &sout, vector &selectline,bool Search); int colnum; int tables; }; SCol::SCol(){ colnum = 0; tables = 0; } string SCol::ExtractData(fstream &sin, ofstream &sout, vector &selectline ,bool Search){ string temp; int table = 0; if(Search && (selectline.size() > 0)){ table = selectline[selectline.size()-1]; selectline.pop_back(); //selectline.resize(selectline.size()-1); } if(Search && (selectline.size() <= 0)){ //check for more tags sin.get(inp); while (inp == ' ' || inp == '<') { if( inp == '<' ) ReturnedTag = tagextractor(sin); sin.get(inp); } // Extract data fully while(inp != '<'){ if(inp != 10 && inp != 13) temp.append(&inp); sin.get(inp); } return temp; } while( !sin.eof() ){ sin.get(inp); if(inp == '<'){ ReturnedTag = tagextractor(sin); if(ReturnedTag == TD){ } //Throw away Scripts or Style Sheet entries else if(ReturnedTag == SCRIPT){ tagprocessor(ReturnedTag, sin, sout); ReturnedTag = tagextractor(sin); } else if(ReturnedTag == STYLE){ tagprocessor(ReturnedTag, sin, sout); ReturnedTag = tagextractor(sin); } else if(ReturnedTag == OTHER){} else if(ReturnedTag == TABLE){ STable CTable; tables++; CTable.tablenum = tables; if(Search){ if( selectline.size() > 0) { if(table == tables){ //num--; return CTable.ExtractTableDetails(sin,sout,selectline,true); } } else{ cerr << "error requesting a table!"; } } CTable.ExtractTableDetails(sin,sout,selectline,false); } else if(ReturnedTag == CTD){ break; } } } return "nothing"; } // Record the information of a row string SDealRow(fstream &sin, ofstream &sout,int &rows, vector &selectline,bool Search, int therow){ SRow newrow; newrow.rownum = rows; //sout << "Row " << newrow.rownum << ": \n"; return newrow.ExtractColumns(sin, sout, selectline, Search); } // Record the information of a row string SDealCol(fstream &sin, ofstream &sout, int &cols, vector &selectline,bool Search, int thecol){ SCol newcol; newcol.colnum = cols; //sout << "Col " << newcol.colnum << ": \n"; return newcol.ExtractData(sin, sout, selectline, Search); } #endif;