/** File: DataRowStatisticsIndexMap.java */ /** * Class DataRowStatisticsIndexMap is used to add statistics on the data to * the IndexMap which could be saved as a .sidx file. * The ftIM IndexMap is read from the * [TODO] add discussion on which data can be used. *
 * List of Methods
* =================== * DataRowStatisticsIndexMap() - Constructor * setIndexMapTable() - set the IndexMap FileTable ftIM to use * setDataTable() - set the data FileTable ft to use if any * createStatisticsIndexMapFile() - create & write Statistics Index Map file * readIndexMapFile() - read the IndexMap FileTable to ftIM. * readDataFileHeader() - read the data file FileTable to ft. * computeRowStatistics() - compute row statistics of data (max,min,mean,stddev) * computeGlobalStatistics() - compute global stats of (max,min,mean,stddev) data * addRowStatisticsToIndexMap() - add the row stats to the IndexMap ftIM table. * addGlobalStatisticsToIndexMapHdr() - add 2 header rows hdr[0:1] for global stats. * writeStatisticsIndexMap() - write extended IndexMap w/statistics as .sidx file. * *
*

* This code is available at the HTMLtools project on SourceForge at * http://htmltools.sourceforge.net/ * under the "Common Public License Version 1.0" * * http://www.opensource.org/licenses/cpl1.0.php.

*

* It was derived and refactored from the open source * MAExplorer (http://maexplorer.sourceforge.net/), and * Open2Dprot (http://Open2Dprot.sourceforge.net/) Table modules. *

* $Date: 2009/09/15 11:45:56 $ $Revision: 1.34 $ *
* Copyright 2008, 2009 by Peter Lemkin * E-Mail: lemkin@users.sourceforge.net * http://lemkingroup.com/ *
*/ public class DataRowStatisticsIndexMap extends Thread { /* DataRowStatisticsIndexMap */ /** converter link */ private HTMLtools cvt= null; /** FileTable of the IndexMap */ private FileTable ftIM= null; /** FileTable of the Data table */ private FileTable ft= null; /* ----------- File name paths ----- */ /** The full path of the Table data file (.idx extension). * We do not read the entire data file into Table ft, but * rather random access (all) rows based on the indexMap * Table ftIM. */ private String dataFilePath= null; /** The full path of the Table Index Map file (.idx extension). * The table is read into ftIM. We do not write the table, but * just read it. */ private String indexMapFilePath= null; /** The full path of the Statistics Index Map file (.sidx extension). * The table the expanded ftIM table after we add the row and global * statistics. We do not read the table, but just write it. */ private String statIndexMapFilePath= null; /** Number of of digits in output statistics for .sidx Table */ private int precision= 0; /* ----------- local -dropColumn list for speed in testing ----- */ /** List of columns in FileTable ft that are in the -dropColumn list. * This is of size nDropListCols. */ private int dropListCols[]= null; /** Size of dropListCols[] list of columns in FileTable ft that are in * the -dropColumn list. */ private int nDropListCols= 0; /* ----------- ftIM Table column indexes ----- */ /* Setup index-map keyword index column for "StartByte" */ private int idxStartByte= -1; /* Setup index-map keyword index column for "EndByte" */ private int idxEndByte= -1; /* Setup extended ftIM keyword index column for row "Min" data */ private int idxMinRow= -1; /* Setup extended ftIM keyword index column for row "Max" data */ private int idxMaxRow= -1; /* Setup extended ftIM keyword index column for row "Mean" data */ private int idxMeanRow= -1; /* Setup extended ftIM keyword index column for row "StdDev" data */ private int idxStdDevRow= -1; /* ----------- ftIM Table column indexes ----- */ /** Number of data columns in the data Table including the drop columns. */ private int nDcols= 0; /** Number of data columns in the data Table NOT including the drop columns. * - computed */ private int nD2cols= 0; /** Number of data rows in the index map Table. */ private int nIMrows= 0; /** Computed row data min values from the data Tables. */ private float minRowVal[]= null; /** Computed row data max values from the data Tables. */ private float maxRowVal[]= null; /** Computed row data mean values from the data Tables. */ private float meanRowVal[]= null; /** Computed row data stdDev values from the data Tables. */ private float stdDevRowVal[]= null; /** Global min value from data tables */ private float glbMinRowVal= 0.0F; /** Global max value from data tables */ private float glbMaxRowVal= 0.0F; /** Global mean value from data tables */ private float glbMeanRowVal= 0.0F; /** Global stdDev value from data tables */ private float glbStdDevRowVal= 0.0F; /** Global min value from data tables */ private double glbMinRowValSum= 0.0; /** Global min value from data tables */ private double glbMaxRowValSum= 0.0; /** Global min value from data tables */ private double glbMeanRowValSum= 0.0; /** Global min value from data tables */ private double glbStdDevRowValSum= 0.0; /** * DataRowStatisticsIndexMap() - Constructor * @param cvt is an instance of converter * @param dataFilePath is the data .txt file that has an associated * .idx IndexMap file with the same base name. The .sidx * file to be created has the same base name */ public DataRowStatisticsIndexMap(HTMLtools cvt, String dataFilePath) { /* DataRowStatisticsIndexMap */ this.cvt= cvt; this.dataFilePath= dataFilePath; /* compute the IndexMap and Statistics Index Map file path names. */ String dfPath= dataFilePath; this.indexMapFilePath= dfPath.substring(0,dfPath.length()-4)+".idx"; this.statIndexMapFilePath= dfPath.substring(0,dfPath.length()-4)+".sidx"; /* Setup the local -dropColumns list */ nDropListCols= -1; dropListCols= null; } /* DataRowStatisticsIndexMap */ /** * setIndexMapTable() - set the IndexMap FileTable ftIM to use * if it is already in memory. */ public void setIndexMapTable(FileTable ftIM) { this.ftIM= ftIM; } /** * setDataTable() - set the data FileTable ft to use if any * if it is already in memory. */ public void setDataTable(FileTable ft) { this.ft= ft; } /** * setTablePrecision() - set number of digits in output statistics. * default is 2. */ public void setTablePrecision(int precision) { if(precision<=0) precision= 2; this.precision= precision; } /** * createStatisticsIndexMapFile() - create & write Statistics Index Map file * @return true if succeed */ public boolean createStatisticsIndexMapFile() { /* createStatisticsIndexMapFile */ /* [1] Read the IndexMap FileTable to ftIM. */ if(!readIndexMapFile()) return(false); /* [2] Read the data file FileTable to ft. */ if(!readDataFileHeader()) return(false); /* [3] compute row statistics of data (max,min,mean,stddev) */ if(!computeRowStatistics()) return(false); /* [4] compute global stats of (max,min,mean,stddev) data */ if(!computeGlobalStatistics()) return(false); /* [5] add the row stats to the IndexMap ftIM table. */ if(!addRowStatisticsToIndexMap()) return(false); /* [6] Add 2nd header row hdr[0] for global stats. */ if(!addGlobalStatisticsToIndexMapHdr()) return(false); /* [7] write extended IndexMap w/statistics as .sidx file. * Note that ftIM has been extended in step [6]. */ if(!writeStatisticsIndexMap(ftIM)) return(false); return(true); } /* createStatisticsIndexMapFile */ /** * readIndexMapFile() - read the IndexMap FileTable to ftIM. * This can be used if the IndexMap file has not been read yet. * @return true if succeed with (ftIM, idxStartByte, idxEndByte) * set up. */ public boolean readIndexMapFile() { /* readIndexMapFile */ ftIM= new FileTable("Index-Map-Table"); if(!ftIM.readAndParseTable(indexMapFilePath)) { UtilCM.logMsg("Problem reading Index Map file '"+ indexMapFilePath+"' \n "+ftIM.errMsgLog); return(false); } /* Clean (remove) header enclosing whitespace */ ftIM.trimTableEnclWhitespace(true,false); /* Setup index-map keyword index column values */ idxStartByte= ftIM.lookupFieldIdx("StartByte"); idxEndByte= ftIM.lookupFieldIdx("EndByte"); return(true); } /* readIndexMapFile */ /** * readDataFileHeader() - read the data file FileTable to ft. * @return true if succeed with ft set up. */ public boolean readDataFileHeader() { /* readDataFileHeader */ ft= new FileTable("Index-Map-Table"); if(!ft.readAndParseTableFields(dataFilePath)) { UtilCM.logMsg("Problem reading data file Table '"+ dataFilePath+"' \n "+ft.errMsgLog); return(false); } /* Clean (remove) header enclosing whitespace */ ft.trimTableEnclWhitespace(true,false); /* Setup the local -dropColumns list */ nDropListCols=0; dropListCols= null; if(cvt.nDropColNames>0) { /* make local drop list */ String tFields[]= ft.tFields; dropListCols= new int[cvt.nDropColNames]; /* worst case */ for(int i=0;i * Hdr[0] has names * ("Global Min", "Global Max", "Global Mean","Global StdDev") * Hdr[0] has global values * ( glbMinRowVal, glbMaxRowVal, glbMeanRowVal, glbStdDevRowVal) * * @return true if succeed */ public boolean addGlobalStatisticsToIndexMapHdr() { /* addGlobalStatisticsToIndexMapHdr */ /* [1] Compute the new header arrays to use */ int nIMcols= ftIM.tCols; String hdrNames[][]= new String[3][]; for(int k=0;k<3;k++) hdrNames[k]= new String[nIMcols]; for(int c=0;c