/** File: DataRowStatisticsIndexMap.java */ /** * Class DataRowStatisticsIndexMap is used to add statistics on the data to * the IndexMap which could be saved as a .sidx file. * The ftIM IndexMap is read from the * [TODO] add discussion on which data can be used. *
* List of Methods*
* =================== * DataRowStatisticsIndexMap() - Constructor * setIndexMapTable() - set the IndexMap FileTable ftIM to use * setDataTable() - set the data FileTable ft to use if any * createStatisticsIndexMapFile() - create & write Statistics Index Map file * readIndexMapFile() - read the IndexMap FileTable to ftIM. * readDataFileHeader() - read the data file FileTable to ft. * computeRowStatistics() - compute row statistics of data (max,min,mean,stddev) * computeGlobalStatistics() - compute global stats of (max,min,mean,stddev) data * addRowStatisticsToIndexMap() - add the row stats to the IndexMap ftIM table. * addGlobalStatisticsToIndexMapHdr() - add 2 header rows hdr[0:1] for global stats. * writeStatisticsIndexMap() - write extended IndexMap w/statistics as .sidx file. * *
* This code is available at the HTMLtools project on SourceForge at * http://htmltools.sourceforge.net/ * under the "Common Public License Version 1.0" * * http://www.opensource.org/licenses/cpl1.0.php.
** It was derived and refactored from the open source * MAExplorer (http://maexplorer.sourceforge.net/), and * Open2Dprot (http://Open2Dprot.sourceforge.net/) Table modules. *
* $Date: 2009/09/15 11:45:56 $ $Revision: 1.34 $
*
* Copyright 2008, 2009 by Peter Lemkin
* E-Mail: lemkin@users.sourceforge.net
* http://lemkingroup.com/
*
*/
public class DataRowStatisticsIndexMap extends Thread
{ /* DataRowStatisticsIndexMap */
/** converter link */
private HTMLtools
cvt= null;
/** FileTable of the IndexMap */
private FileTable
ftIM= null;
/** FileTable of the Data table */
private FileTable
ft= null;
/* ----------- File name paths ----- */
/** The full path of the Table data file (.idx extension).
* We do not read the entire data file into Table ft, but
* rather random access (all) rows based on the indexMap
* Table ftIM.
*/
private String
dataFilePath= null;
/** The full path of the Table Index Map file (.idx extension).
* The table is read into ftIM. We do not write the table, but
* just read it.
*/
private String
indexMapFilePath= null;
/** The full path of the Statistics Index Map file (.sidx extension).
* The table the expanded ftIM table after we add the row and global
* statistics. We do not read the table, but just write it.
*/
private String
statIndexMapFilePath= null;
/** Number of of digits in output statistics for .sidx Table */
private int
precision= 0;
/* ----------- local -dropColumn list for speed in testing ----- */
/** List of columns in FileTable ft that are in the -dropColumn list.
* This is of size nDropListCols.
*/
private int
dropListCols[]= null;
/** Size of dropListCols[] list of columns in FileTable ft that are in
* the -dropColumn list.
*/
private int
nDropListCols= 0;
/* ----------- ftIM Table column indexes ----- */
/* Setup index-map keyword index column for "StartByte" */
private int
idxStartByte= -1;
/* Setup index-map keyword index column for "EndByte" */
private int
idxEndByte= -1;
/* Setup extended ftIM keyword index column for row "Min" data */
private int
idxMinRow= -1;
/* Setup extended ftIM keyword index column for row "Max" data */
private int
idxMaxRow= -1;
/* Setup extended ftIM keyword index column for row "Mean" data */
private int
idxMeanRow= -1;
/* Setup extended ftIM keyword index column for row "StdDev" data */
private int
idxStdDevRow= -1;
/* ----------- ftIM Table column indexes ----- */
/** Number of data columns in the data Table including the drop columns. */
private int
nDcols= 0;
/** Number of data columns in the data Table NOT including the drop columns.
* - computed */
private int
nD2cols= 0;
/** Number of data rows in the index map Table. */
private int
nIMrows= 0;
/** Computed row data min values from the data Tables. */
private float
minRowVal[]= null;
/** Computed row data max values from the data Tables. */
private float
maxRowVal[]= null;
/** Computed row data mean values from the data Tables. */
private float
meanRowVal[]= null;
/** Computed row data stdDev values from the data Tables. */
private float
stdDevRowVal[]= null;
/** Global min value from data tables */
private float
glbMinRowVal= 0.0F;
/** Global max value from data tables */
private float
glbMaxRowVal= 0.0F;
/** Global mean value from data tables */
private float
glbMeanRowVal= 0.0F;
/** Global stdDev value from data tables */
private float
glbStdDevRowVal= 0.0F;
/** Global min value from data tables */
private double
glbMinRowValSum= 0.0;
/** Global min value from data tables */
private double
glbMaxRowValSum= 0.0;
/** Global min value from data tables */
private double
glbMeanRowValSum= 0.0;
/** Global min value from data tables */
private double
glbStdDevRowValSum= 0.0;
/**
* DataRowStatisticsIndexMap() - Constructor
* @param cvt is an instance of converter
* @param dataFilePath is the data .txt file that has an associated
* .idx IndexMap file with the same base name. The .sidx
* file to be created has the same base name
*/
public DataRowStatisticsIndexMap(HTMLtools cvt,
String dataFilePath)
{ /* DataRowStatisticsIndexMap */
this.cvt= cvt;
this.dataFilePath= dataFilePath;
/* compute the IndexMap and Statistics Index Map file path names. */
String dfPath= dataFilePath;
this.indexMapFilePath= dfPath.substring(0,dfPath.length()-4)+".idx";
this.statIndexMapFilePath= dfPath.substring(0,dfPath.length()-4)+".sidx";
/* Setup the local -dropColumns list */
nDropListCols= -1;
dropListCols= null;
} /* DataRowStatisticsIndexMap */
/**
* setIndexMapTable() - set the IndexMap FileTable ftIM to use
* if it is already in memory.
*/
public void setIndexMapTable(FileTable ftIM)
{ this.ftIM= ftIM; }
/**
* setDataTable() - set the data FileTable ft to use if any
* if it is already in memory.
*/
public void setDataTable(FileTable ft)
{ this.ft= ft; }
/**
* setTablePrecision() - set number of digits in output statistics.
* default is 2.
*/
public void setTablePrecision(int precision)
{
if(precision<=0)
precision= 2;
this.precision= precision;
}
/**
* createStatisticsIndexMapFile() - create & write Statistics Index Map file
* @return true if succeed
*/
public boolean createStatisticsIndexMapFile()
{ /* createStatisticsIndexMapFile */
/* [1] Read the IndexMap FileTable to ftIM. */
if(!readIndexMapFile())
return(false);
/* [2] Read the data file FileTable to ft. */
if(!readDataFileHeader())
return(false);
/* [3] compute row statistics of data (max,min,mean,stddev) */
if(!computeRowStatistics())
return(false);
/* [4] compute global stats of (max,min,mean,stddev) data */
if(!computeGlobalStatistics())
return(false);
/* [5] add the row stats to the IndexMap ftIM table. */
if(!addRowStatisticsToIndexMap())
return(false);
/* [6] Add 2nd header row hdr[0] for global stats. */
if(!addGlobalStatisticsToIndexMapHdr())
return(false);
/* [7] write extended IndexMap w/statistics as .sidx file.
* Note that ftIM has been extended in step [6].
*/
if(!writeStatisticsIndexMap(ftIM))
return(false);
return(true);
} /* createStatisticsIndexMapFile */
/**
* readIndexMapFile() - read the IndexMap FileTable to ftIM.
* This can be used if the IndexMap file has not been read yet.
* @return true if succeed with (ftIM, idxStartByte, idxEndByte)
* set up.
*/
public boolean readIndexMapFile()
{ /* readIndexMapFile */
ftIM= new FileTable("Index-Map-Table");
if(!ftIM.readAndParseTable(indexMapFilePath))
{
UtilCM.logMsg("Problem reading Index Map file '"+
indexMapFilePath+"' \n "+ftIM.errMsgLog);
return(false);
}
/* Clean (remove) header enclosing whitespace */
ftIM.trimTableEnclWhitespace(true,false);
/* Setup index-map keyword index column values */
idxStartByte= ftIM.lookupFieldIdx("StartByte");
idxEndByte= ftIM.lookupFieldIdx("EndByte");
return(true);
} /* readIndexMapFile */
/**
* readDataFileHeader() - read the data file FileTable to ft.
* @return true if succeed with ft set up.
*/
public boolean readDataFileHeader()
{ /* readDataFileHeader */
ft= new FileTable("Index-Map-Table");
if(!ft.readAndParseTableFields(dataFilePath))
{
UtilCM.logMsg("Problem reading data file Table '"+
dataFilePath+"' \n "+ft.errMsgLog);
return(false);
}
/* Clean (remove) header enclosing whitespace */
ft.trimTableEnclWhitespace(true,false);
/* Setup the local -dropColumns list */
nDropListCols=0;
dropListCols= null;
if(cvt.nDropColNames>0)
{ /* make local drop list */
String tFields[]= ft.tFields;
dropListCols= new int[cvt.nDropColNames]; /* worst case */
for(int i=0;i