/** File: MakeFlipTable.java */ /** * Class MakeFlipTable to flip a Table by either of two methods. *
* In both cases, it effectively transposes rows and columns but has * other functionality as well. The first way is to do it in memory * using an existing Table that contains tData. The second, for very * large table files, is applicable when there are a small number of * columns to be flipped. It works by using the Index-Map for the Table * and then random accesses only the rows specified from the file rather * than from memory for the data to flip. *
* Generate transposed files using random access file indexing to create * a multi-line header (1 line for each column name in the list) using * the list of columns previously specified when generating the index map * file with the '-makeIndexMapFile:{colName1,colName2,...,colNameN}' command. * It analyzes the index map Table and then uses all columns before the * ("StartByte", "EndByte") columns to define the flipped Table header. * See the '-flipColumnName:{flipColumnFile,flipColumnName}' or * '-flipColumnName:{*LIST*,flipColumnName,v1,v2,...vn}' to * restrict which flipped column data to use. * See the '-flipRowFilterNames:{flipRowFilterNamesFile}' to * restrict which flipped row data to use. * It is set by '-flipTableByIndexMap:{flipDataFile,flipIndexMapFile}' * switch. *
* List of Methods * =================== * MakeFlipTable() - Constructor * setFlippedOutputFile() - set the dir and name for flipped output file. * makeWorkingTables() - Initialize working Tables required for flipping data * processData() - create the flipped data Table and write it out * extractDataRowsByColumnFilters() - extract ftData rows by column-filters * makeListOfSeekDataRowsByColumnFilters() filter rows from Index Map data * matchColumnFilterData() - test a row of Index Map data to see if match Column lists * matchRowFilterData() - test a String data row to for row name match. * flip_ftRowsTo_ftFlippedTable() - create ftFlippedTable from ftRows Table * saveFlipTableAsHTMLfile() - save flip Table as HTML file. * saveFlipTableAsTextfile() - save flip Table as a tab-delimited text file. * mapMultilineHdrHREFs() - create HTML for ftFlipped Table. * * List of Tables * ================= * ftData - MRR data Table * ftIndex - index-map Table for the ftData Table file * ftRows - extracted rows from the ftData table that will be flipped * ftFlipped - flipped Table constructed from ftRows Table. ** * List of Switches and Globals.java variables set by them * ============================================================ * -flipTableByIndexMap:{flipDataFile,flipIndexMapFile,(opt)maxRows} * cvt.flipTableByIndexMapFlag * cvt.flipDataFile * cvt.flipIndexMapFile * cvt.maxFlipSeekRowsToExtract * * -flipColumnName:{flipColumnFile,flipColumnNames} * cvt.flipColumnFile[0:cvt.nFlipColumns-1] * cvt.flipColumnName[0:cvt.nFlipColumns-1] * cvt.flipColumnValues[0:cvt.nFlipColumns-1][] * cvt.nFlipColumns * * -flipExcludeColumnName:{flipExcludeColumnName} * cvt.flipExcludeColumnName[0:cvt.nFlipExcludeColumns-1] * cvt.nFlipExcludeColumns * * -flipOrderHdrColNames:{colHdrName1,colHdrName2,...,colHdrNameN} * cvt.flipOrderHdrColList[0:nFlipOrderHdrColList-1] * cvt.nFlipOrderHdrColList * * -flipRowFilterNamesFile:{flipRowFilterNamesFile} * cvt.flipRowFilterNamesFile * cvt.flipRowFilterNames * cvt.nFlipRowFilterNames * * *
* This code is available at the HTMLtools project on SourceForge at * http://htmltools.sourceforge.net/ * under the "Common Public License Version 1.0" * * http://www.opensource.org/licenses/cpl1.0.php.
** It was derived and refactored from the open source * MAExplorer (http://maexplorer.sourceforge.net/), and * Open2Dprot (http://Open2Dprot.sourceforge.net/) Table modules. *
* $Date: 2009/12/02 11:45:56 $ $Revision: 1.38 $
*
* Copyright 2008, 2009 by Peter Lemkin
* E-Mail: lemkin@users.sourceforge.net
* http://lemkingroup.com/
*
*/
public class MakeFlipTable
{
/** Note all global variables are in Globals.java. These global variables
* are in Globals.java accessed through HTMLtools cvt instance.
*/
public HTMLtools
cvt;
/** Global fileTable instance */
public FileTable
fio;
/** The output flip .txt file. The HTML file has the same
* base name but has a .html extension instead of the .txt
* extension. This is set by setFlippedOutputFile().
*/
private String
flipOutputFile= null;
/* ----- Tables used ---- */
/** Input data FileTable instance. Since we buffer the Table, this
* just contains the header information.
*/
public FileTable
ftData= null;
/** Input data index-map FileTable instance mapping ftData file
* seeks (start,end) bytes.
*/
public FileTable
ftIndex= null;
/** Extracted subset of data rows FileTable instance from the ftData Table */
public FileTable
ftRows= null;
/** Flipped data FileTable instance constructed from ftRows Table */
public FileTable
ftFlipped= null;
/* ----- Column Filters ---- */
/** These are the list of column name data for filtering each column
* to be used for the new flipped Table headers. The data is
* contained in files cvt.flipColumnFile[0:cvt.nFlipColumns-1] with
* column names cvt.flipColumnName[0:cvt.nFlipColumns-1]. The data
* is in the 2nd dimension of the lists of size
* nFlipColNameFilterData[0:cvt.nFlipColumns-1]
*/
public String
flipColNameFilterData[][]= null;
/** These are the sizes of the list of column name data for filtering
* each column to be used for the new flipped Table headers. The data is
* contained in files cvt.flipColumnFile[0:cvt.nFlipColumns-1] with
* column names cvt.flipColumnName[0:cvt.nFlipColumns-1]. The data
* is in the 2nd dimension of the lists of size
* nFlipColNameFilterData[0:cvt.nFlipColumns-1]
*/
public int
nFlipColNameFilterData[]= null;
/** List of ftIndex.tField[] index of the column names. Note:
* this is synced with idxColData[]. */
public int
idxColIMfilters[]= null;
/** Ordered List of ftData.tField[] indexes of the column names for
* the flipped table header. Set indirectly by
* -flipOrderHdrColNames:{colHdrName1,colHdrName2,...,colHdrNameN}
* switch. */
public int
idxOrderedColHdrNames[]= null;
/* ----- Start/End row seek data ---- */
/** List of "StartByte" ftIndex.tField[] index of the column names */
public int
idxStartByte= -1;
/** List of "EndByte" ftIndex.tField[] index of the column names */
public int
idxEndByte= -1;
/** List of ftData.tField[] indexes of the column names. Note:
* this is synced with idxColIMfilters[]. */
public int
idxColData[]= null;
/** Input data file path. */
public String
flipDataPath= null;
/** Input Index Map file path. */
public String
flipIndexMapPath= null;
/** Processing succeeded and ftFlipped Table is valid. */
public boolean
dataOK= false;
/** Maximum size of start/endSeekByte[] lists for reallocation. */
public int
maxSeekRowsToExtract= 0;
/** Size of start/endSeekByte[] lists for reallocation. */
public int
nSeekRows= 0;
/** List of row numbers corresponding to the seek pointers. */
public int
rowNbrToSeek[]= null;
/** List of start row byte seek pointers. */
public long
startSeekByte[]= null;
/** List of end row byte seek pointers. */
public long
endSeekByte[]= null;
/* ------- Global Statistics Index Map -------- */
/** Global min value name in table */
private String
glbMinRowName= "Global Min";
/** Global max value name in table */
private String
glbMaxRowName= "Global Max";
/** Global mean value name in table */
private String
glbMeanRowName= "Global Mean";
/** Global stdDev value name in table */
private String
glbStdDevRowName= "Global StdDev";
/** Global min value index of name in table */
private int
glbMinRowIndex= -1;
/** Global max value index of name in table */
private int
glbMaxRowIndex= -1;
/** Global mean value index of name in table */
private int
glbMeanRowIndex= -1;
/** Global stdDev value index of name in table */
private int
glbStdDevRowIndex= -1;
/** Sort Table title if any */
private String
sortTableTitle= null;
/** Computation Strings that can be added to the report. The HTML version is for
* the generated HTML file */
public String
sFlipTableReport= "";
public String
sFlipTableReportHTML= "";
/**
* MakeFlipTable() - Constructor
* @param cvt is instance of HTMLtools
* @param maxSeekRowsToExtract - max # of rows to extract for flipped Table
*/
public MakeFlipTable(HTMLtools cvt,
int maxSeekRowsToExtract)
{ /* MakeFlipTable */
this.cvt= cvt;
this.fio= cvt.fio;
fio.initFileIO(); /* Reset random access file defaults */
if(maxSeekRowsToExtract<=0)
maxSeekRowsToExtract= 100;
this.maxSeekRowsToExtract= maxSeekRowsToExtract;
this.initClassVars();
} /* MakeFlipTable */
/**
* initClassVars() - reset the class variables
*/
private void initClassVars()
{ /* initClassVars */
flipOutputFile= null;
ftData= null;
ftIndex= null;
ftRows= null;
ftFlipped= null;
flipColNameFilterData= null;
nFlipColNameFilterData= null;
idxColIMfilters= null;
idxOrderedColHdrNames= null;
idxStartByte= -1;
idxEndByte= -1;
idxColData= null;
flipDataPath= null;
flipIndexMapPath= null;
dataOK= false;
//maxSeekRowsToExtract= 100;
nSeekRows= 0;
rowNbrToSeek= null;
startSeekByte= null;
endSeekByte= null;
} /* initClassVars */
/**
* setFlippedOutputFile() - set the dir and name for flipped output file.
* This sets the output file name either from the -saveTableAsFile switch
* or using the input data file base name with a -addOutputPostfix
* substring or "-flipped" in the worst case.
* @param outputDataDir - directory for saving the flipped Table .txt and
* .html files
* @param flipOutputFile - name of the output flipped Table file to be save
* @return
*/
public boolean setFlippedOutputFile(String flipOutputFile)
{
this.flipOutputFile= flipOutputFile;
return(true);
}
/**
* makeWorkingTables() - Initialize working Tables required for flipping data.
* This sets up the FileTables for
*
* ftData - for the input data Table file * ftIndex - for index map Table file corresponding to input data file rows * ftRows - for the filtered data rows to be computed * ftFlipped - for the flipped ftRows data to be computed ** It also reads the '-flipColumnName:{flipColumnFile,flipColumnName}' or * '-flipColumnName:{*LIST*,flipColumnName,v1,v2,...vn}' data, and it * reads the '-flipRowFilterNamesFile:{flipRowNameFile}' filtered * by "-flipRowGSPIDfilterSubstring:"{substring}". * @return true if succeed * * @see FileTable * @see FileTable#readAndParseTableFieldsAndIndexMap * @see FileTable#readAndParseTable * @see FileTable#readFileAsString * @see FileTable#lookupFieldIdx * @see UtilCM#mapCRLF2space * @see UtilCM#replaceSubstrInString * @see UtilCM#cvs2Array * @see UtilCM#logMsg */ public boolean makeWorkingTables() { /* makeWorkingTables */ dataOK= false; /* set to true if ftFlipped Table generation ok. */ /* [1] Check that have minmum switches specified */ if(!cvt.flipTableByIndexMapFlag || cvt.flipDataFile==null || cvt.flipIndexMapFile==null) { /* bad: -flipTableByIndexMap:{flipDataFile,flipIndexMapFile,(opt)maxRows} */ UtilCM.logMsg("Problem, no "+ "'-flipTableByIndexMap:{flipDataFile,flipIndexMapFile}'"+ " switch set.\n"); return(false); } else if(cvt.nFlipOrderHdrColList==0 || cvt.flipOrderHdrColList==null) { /* bad -flipOrderHdrColNames:{colHdrName1,colHdrName2,...,colHdrNameN} */ UtilCM.logMsg("Problem, no "+ "'-flipOrderHdrColNames:{colHdrName1,colHdrName2,...,colHdrNameN}'"+ " switch set.\n"); return(false); } /* [1.1] Compute the data and index file Tables file paths. */ /* Setup the file paths */ cvt.curInputFile= cvt.flipDataFile; /* Used for HTML documentation */ flipDataPath = cvt.inputDataDir + cvt.flipDataFile; flipDataPath= fio.mapPathFileSeparators(flipDataPath); flipIndexMapPath= cvt.inputDataDir + cvt.flipIndexMapFile; flipIndexMapPath= fio.mapPathFileSeparators(flipIndexMapPath); /* [1.2] If doing a heat map by changing the background of cells, then * map the cell value to a background color. */ if(cvt.showDataHeatmapFlipTableFlag) { /* test if .sidx exists */ /* Force the index map file to be .sidx */ if(cvt.flipIndexMapFile.endsWith(".idx")) cvt.flipIndexMapFile= cvt.flipIndexMapFile.substring(0,cvt.flipIndexMapFile.length()-4)+ ".sidx"; String sGlbIMfile= cvt.flipIndexMapFile; flipIndexMapPath= cvt.inputDataDir + cvt.flipIndexMapFile; flipIndexMapPath= fio.mapPathFileSeparators(flipIndexMapPath); if(!fio.fileExists(flipIndexMapPath)) { /* .sidx does not exist */ cvt.hasStatIndexMapFileFlag= false; cvt.flipIndexMapFile= cvt.flipIndexMapFile.substring(0,cvt.flipIndexMapFile.length()-4)+ ".sidx"; flipIndexMapPath= cvt.inputDataDir + cvt.flipIndexMapFile; flipIndexMapPath= fio.mapPathFileSeparators(flipIndexMapPath); cvt.showDataHeatmapFlipTableFlag= false; UtilCM.logMsg("Problem, no '"+sGlbIMfile+ "' exists - ignoring HTML heatmap generation.\n"); } else cvt.hasStatIndexMapFileFlag= true; } /* test if .sidx exists */ /* Update the status line if using the GUI*/ cvt.incrUpdateStatusLine("Making intermediate flip tables for '"+ cvt.flipDataFile+"'"); UtilCM.logMsg("... Note: this will take a few seconds ..."); /* [2] Read the data file and set up the Table */ ftData= new FileTable("Input-Data-Table"); if(!ftData.readAndParseTableFields(flipDataPath)) { UtilCM.logMsg("Problem reading flip data file '"+flipDataPath+ "' \n "+ftData.errMsgLog); return(false); } /* Clean (remove) header enclosing whitespace */ ftData.trimTableEnclWhitespace(true,false); /* [TODO] additional preprocessing of the data Table. */ /* [3] Read the index map file and set up the Table */ ftIndex= new FileTable("Index-Map-Table"); /* If .sidx (Statistics Index Map), it has Global statistics in * header[0:1], [2] is fields. */ if(flipIndexMapPath.endsWith(".sidx")) ftIndex.setNbrTableHdrLines(3); if(!ftIndex.readAndParseTable(flipIndexMapPath)) { UtilCM.logMsg("Problem reading flip Index Map file '"+ flipIndexMapPath+"' \n "+ftIndex.errMsgLog); return(false); } /* Clean (remove) header enclosing whitespace */ ftIndex.trimTableEnclWhitespace(true,false); /* Setup index-map keyword index column values */ ftData.ftIdxMap= ftIndex; idxStartByte= ftIndex.lookupFieldIdx("StartByte"); idxEndByte= ftIndex.lookupFieldIdx("EndByte"); /* [3.1] Setup Global statistics if .sidx file, then * get (Statistics Index Map), it has Global statistics in * * header[0:1]. */ if(flipIndexMapPath.endsWith(".sidx")) getGlobalStatistics(ftIndex); /* [3.2] Make colormap data scale quantile TABLE HTML mapping the * cvt.heatMapColors[] quantiles to positive numeric value in range of * [cvt.glbMinRowVal, cvt.glbMaxRowVal]. Computed by * convert.makeColorMapScaleHTML() if * cvt.showDataHeatmapFlipTableFlag and cvt.hasStatIndexMapFileFlag are set. */ if(cvt.showDataHeatmapFlipTableFlag && cvt.hasStatIndexMapFileFlag) cvt.colorMapScaleHTML= cvt.convert.makeColorMapScaleHTML(); /* [4] Create a Table to hold the data Table filtered rows to be flipped. */ ftRows= ftData.cloneTable("Filtered-Rows"); /* [5] Create a Table to hold the flipped data from the ftRows Table. */ ftFlipped= new FileTable("Flipped-Table"); /* [6] Read the '-flipColumnName:{flipColumnFile,flipColumnName}' data. */ flipColNameFilterData= new String[cvt.nFlipColumns][]; nFlipColNameFilterData= new int[cvt.nFlipColumns]; idxColIMfilters= new int[cvt.nFlipColumns]; idxColData= new int[cvt.nFlipColumns]; for(int c=0;c
";
sExcelLink= UtilCM.replaceSubstrInString(sExcelLink, "\\", "/");
if(cvt.flipRowGSPIDfilters!=null && cvt.flipRowGSPIDfilters.length!=0)
{ /* add it to searchFilterSW list */
sGSPIDrowFilters += "Substring filter to limit sample rows by GSP ID names matches: ";
int k1= (cvt.flipRowGSPIDfilters[0].equals("AND") ||
cvt.flipRowGSPIDfilters[0].equals("OR")) ? 1 : 0;
String sBoolName= (cvt.flipRowGSPIDfilters[0].equals("OR"))
? " OR " : " AND ";
for(int k=k1;k \n"+
"Summary of threshold filtered fold-changes of Search Results Table\n"+
"for (fold-changes >= "+UtilCM.cvf2s(fcThr,4)+") and for"+
" (1.0/fold-changes >= "+UtilCM.cvf2s(fcThr,4)+"): \n"+
""+
"Kept "+nKeptCols+" genes/probes:\n"+sKeptNames+" \n";
UtilCM.logMsg(sFlipTableReport);
return(true);
} /* thresholdFoldChangeColumnsInFlipTable */
} /* end of class MakeFlipTable */
"+
"Note the fold-change values are in the bottom part of the table.\n
";
/* [4] If reporting fold change with reportFoldChangeFlag in flip Table
* reporting, and -flipFCthreshold:{flipFCthreshold}, then
* set the flipFCthrFlag and save the postive value in
* flipFCthreshold.
*/
if(cvt.flipFCthrFlag)
{
boolean ok= thresholdFoldChangeColumnsInFlipTable(ftFlipped,cvt.flipFCthreshold,
fcAB);
if(!ok)
UtilCM.logMsg("\nProblem threshold filtering fold-changes - continuing.\n");
}
return(true);
} /* calcReportFoldChangeABstatistics */
/**
* thresholdFoldChangeColumnsInFlipTable() - threshold columns < flipFCthreshold.
* If reporting fold change with reportFoldChangeFlag in flip Table
* reporting, and -flipFCthreshold:{flipFCthreshold}, then
* set the flipFCthrFlag and save the postive value in
* flipFCthreshold.
* Keep columns c with fcAB[c] >= fcThr and for 1/fcAB[c] >=fcThr.
* @param ftF is the ftFlipped table
* @param fcThr is flipFCthreshold
* @param fcAB is the fold-change values for the corresponding columns as ftF
* @return true if sucessful
*/
private boolean thresholdFoldChangeColumnsInFlipTable(FileTable ftF,
float fcThr,
double fcAB[])
{ /* thresholdFoldChangeColumnsInFlipTable */
if(!cvt.flipFCthrFlag || fcThr<=0.0F)
return(false); /* no change */
/* Copy columns that pass the filter test */
int
nCols= ftF.tCols,
nRows= ftF.tRows;
boolean
keepCol[]= new boolean[ftF.tCols];
keepCol[0]= true; /* Always keep first column */
/* Keep ftF.tData columns that pass the test. Check in reverse order
* so that delete columns from the right end of the table first.
*/
int
nKeptCols= 0,
nRemoved= 0;
String
colName,
sKeptNames= "",
sRemovedNames= "";
for(int c=(nCols-1);c>=1;c--)
{ /* Keep columns passing the filter */
float fc= (float)fcAB[c];
if(fc>=fcThr || (1.0F/fc)>=fcThr)
{ /* Keep the column */
colName= "["+ftF.tHeader[0][c]+" "+ftF.tHeader[2][c]+"]\n";
sKeptNames += colName+" ";
keepCol[c]= true;
nKeptCols++;
}
else
{ /* Remove the column */
colName= "["+ftF.tHeader[0][c]+" "+ftF.tHeader[2][c]+"]\n";
sRemovedNames += colName+" ";
keepCol[c]= false;
ftF.deleteTableColumnByColIdx(c); /* does the heavy lifting to table */
nRemoved++;
}
} /* Keep columns passing the filter */
sFlipTableReport += "\n" +
"------------------------------------------------------------------\n"+
"Summary of threshold filtered fold-changes of Search Results Table\n"+
"for fold-changes >= "+UtilCM.cvf2s(fcThr,4)+" and for"+
" 1.0/fold-changes >= "+UtilCM.cvf2s(fcThr,4)+":\n"+
"\nKept "+nKeptCols+" genes/probes:\n"+sKeptNames+"\n"+
"Removed "+nRemoved+" genes/probes:\n"+sRemovedNames+"\n";
sFlipTableReportHTML += "
\n"+
"Removed "+nRemoved+" genes/probes:\n"+sRemovedNames+
"