/** File: MakeTestsIntersectionTbl.java */ /** * Class MakeTestsIntersectionTbl to generate a Tests Intersection Table * from the set of data specified by the {test-TODO-list.txt} table. * It is invoked by the *
* '-makeTestsIntersectionTbl:{testsToDoFile}' switch. * Note: that it also uses switches: * (required) -inputDirectory:{input dir} * (required) -outputDirectory:{output directory} * (optional) -mapHdrNames:{mapHdrNamesFile,fromHdrName,toHdrName} * (optional) -noHTML * (optional) -saveEditedTable2File:{outTblFile,opt. "HTML"} * (optional) -filterTestTestIntersection:{testTableField,d1,d2,...,dn} * (optional) -filterDataTestIntersection:{dataTableField,d1,d2,...,dn} * (optional) -addFCrangesForTestsIntersectionTable * (optional) -addRangeOfMeansToTItable * (optional) -limitMaxTableRows:{maxNbrRows,(opt.)sortByCol,(opt.)'A'or 'D'} * * If HTML is generated, then the usual modifiers are used: * (required) -addProlog:{opt. prolog file name}' * (required) -addEpilogue:{opt. epilogue file name}' * (optional) -alternateRowBackgroundColor:{c} * (optional) -mapDollarsigns:{$$keyword$$,toString} * (optional) -mapQuestionmarks:{??keyword??,toString} ** *
* * It generates a table ftTI (Tests-Intersection) that contains data from * the individual tests from the '-inputDataDir:{inputDataDir}' organized * by rows of +FC genes/Feature-IDs and -FC genes/Feature-IDs. * The data from the {testsToDoFile} is read into ftGTT and is use to * get additional information for each test as follows. * The multi-row header includes: (starting in 4th column) with data * from the testsToDoFile for each test used: *
* 'Tissue' (from the "Test Name") * 'FC & pValue' (from the "p-Value test thresh." & "fold-change test thresh.") * 'Test label' (from the "HTML page label') * "MnA/MnB" (fixed comment indicating fold-change) * 'testName' (from the "Test Name") * * It is derived from all of the mAdb MRR test results data and * used the mAdb-TestsToDo.xls data for annotation. * * ** Each row has 3 leading params ("Gene" "Feature ID" "Well ID") that * can be hyperlinked in the HTML version of the table. * ** This is followed by the "A-B Mean Difference" value for that test * (for all tests). * ** The test name, tissue, A vs B names, "MnA/MnB" are part of the 5 * line header. * ** The +FC and -FC data are merged (since the same gene can not be found * in both the +FC and -FC reports). * ** All genes found in any test are included in the table. * ** Where a gene does not show up in a test because the FC was below * threshold, that cell is left blank. * ** if adding FC ranges (-addFCrangesForTestsIntersectionTable switch) * then add three columns "Max FC", "Min FC" and "Range FC". * Note that we can sort on these since the sorting is done after * the table is computed. * * The following is a (BOGUS data) example of what it might look like. * * * Erythroid Liver . . . * Stat5KO vs WT WT+GH vs WT-GH . . . * MnA/MnB MnA/MnB . . . * "Gene" "Feature ID" "Well ID" EG1-test-1 EG2-test-1 . . . * ------ ------------ --------- ------------- --------------- * Stat3 1460700_at 123456 1.234 -3.1234 . . . * Sox9 1451538_at 234567 2.1234 . . . * Sox3 1455899_x_at 345678 2.5432 -4.3210 . . . * etc... * ** *
* Data in the Test-ToDo-List Table * Column entry Description * "Affy .CEL file (16)" - The Affymetrix .CEL file name in the * GSP-Inventory.xls Egxxxx worksheets * "Simple GSP ID (10)" - The Simple GSP ID sample name in the * GSP-Inventory.xls Egxxxx worksheets * "GSP ID (9)" - The default GSP ID sample name in the GSP-Inventory.xls * EGxxxx worksheets * "Class A" - The samples in class A for the test * "Class B" - The samples in class B for the test * "t-Test or Fold-Change Test" - A/B indicates a fold change reported * as A samples / B samples. * "Test Name" - The unique name of the test formed from the Egxxx number. * E.g., EG5.1-test-3 * "p-Value test thresh" - If there are at least 2 samples for both A * and B classes, this is the p-value threshold * to be used. It is empty if there are not at * least 2 samples/class. * "fold-change test thresh" - The fold-change threshold to be used. * It is shown as +/-nX since we do the test for * genes > nX and genes < 1/nX and report them * separately. * "HTML page label" - Converter web page content page label * "HTML page description" - Converter web page description content page label * "HTML tissue name" - Converter tissue name used in the Web page and in * possibly in other areas. * "Relative directory" - Relative subdirectory entry in the directory * tree used for a) mAdb tests results (.txt & .zip * files) go, b) converter input data and HTML and * JTV output data, and c) the Jak-Stat Prospector Web * site subdirectory tree. * * Test name usage: File names generated using the 'Test Name' * tests * Tests for samples: * {testName}+FC.txt * {testName}-FC.txt * * The converter output will be a mixture of .txt, .html files, and * processed JTV directories and .zip files. * The JTV .zip files have the .zip removed and an HTML file generated * to start up the JTV applet from the Web page. * * List of variables set by switch: * -makeTestsIntersectionTbl:{testsToDoFile} * cvt.makeTestsIntersectionTableFlag * cvt.testsToDoTblFile * cvt.filterTestField * cvt.filterTestsList * cvt.filterDataField * cvt.filterDataList * cvt.addFCrangesForTestsIntersectionTableFlag * cvt.addRangeOfMeansToTItableFlag * * List of Methods* *
* =================== * MakeTestsIntersectionTbl() - Constructor * setTestFilterList() - set list of test keywords for filtering tests. * setDataFilterList() - set list of data keywords for filtering data. * initTestsIntersection() - initialize the input files and I/O paths * initTblHeaderIndexes() - get TestToDo Table header idx variables * getTestToDoRowData() - get and save row data to class 's'prefix variables. * createTestsIntersectionTable() - create the Tests-Intersection Table. * addAllTestsDataToTI() - add all Tests Data to ftTI Table. * addFCdataInstanceToTI() - add gene FC data to Test-Intersection Table. * makeUniqueRelDirList() - generate unique list of Relative Dir. entries. * getAllTestDataForRelDir() - get list of test data for rel-dir in tdfrd. * cvtSimpleGspId2EG() - map Simple GSP ID to an EGxxxx.y * matchTestFilter() - test current test tField name of ft Table * matchDataFilter() - test the tField name of current MRR Table data. * addFCrangesForTestsIntersectionTable() - adding FC range computations * calcRunningMaxMinMeanABvalues() - calc row r ft max and min MeanA(B) values * getFilterSummaryHTML() - get an HTML summary of the test/data filters used. * * Internal Class
* =================== * Class TestDataForRelDir contains lists of rows of data computed as side * effect for computing active lists when calling getAllTestDataForRelDir(). *
* This code is available at the HTMLtools project on SourceForge at * http://htmltools.sourceforge.net/ * under the "Common Public License Version 1.0" * * http://www.opensource.org/licenses/cpl1.0.php.
** It was derived and refactored from the open source * MAExplorer (http://maexplorer.sourceforge.net/), and * Open2Dprot (http://Open2Dprot.sourceforge.net/) Table modules. *
* $Date: 2009/07/20 11:45:56 $ $Revision: 1.28 $
*
* Copyright 2008, 2009 by Peter Lemkin
* E-Mail: lemkin@users.sourceforge.net
* http://lemkingroup.com/
*
*/
public class MakeTestsIntersectionTbl
{
/* Note all global variables are in Globals.java except the help
* messages which are in HelpMsgs.java.
*/
public HTMLtools
cvt;
/** Global fileTable instance */
public FileTable
fio;
/** Class to contain lists of rows of data computed as side effect
* for computing active lists when calling getAllTestDataForRelDir().
*/
private TestDataForRelDir
tdfrd= null;
/** Unique list of Relative Directory entries. */
public String
uniqueRelDir[]= null;
/* ---------- FileTables used in the batch generator ----------- */
/** The FileTable created from the {test-ToDo-list.txt} file.
* Set with '-MakeTestsIntersectionTbl:{mAdb-TestsToDo.txt}'.
*/
public FileTable
ftGTT= null;
/** The FileTable Test-Intersection Table created from the tests data
* that intersection the genes in the tests.
*/
public FileTable
ftTI= null;
/* --------- args from the command Switches parser ---------- */
/** The path for the tests directory from the current directory. */
private String
testsToDoTblPath= null;
/** Input tree dir path from current directory */
public String
testsInputTreePath= null;
/* ------ index variables for the Test-ToDo ftGTT Table ------- */
/** The index variables for the Test-ToDo ftGTT Table instance
* variables.
*/
//[REFACTOR] private int idxGTT[]= null;
private int
idxAffyCELfile= -1,
idxSimpleGSP_ID= -1,
idxGSP_ID= -1,
idxClassA= -1,
idxClassB= -1,
idxFCcalc= -1,
idxTestName= -1,
idx_pValueThr= -1,
idxFCthr= -1,
idxHTMLpageLabel= -1,
idxHTMLpageDescr= -1,
idxHTMLtissueName= -1,
idxRelDir= -1;
/** The Test-ToDo ftGTT Table instance variables */
//[REFACTOR] private String sRowGTT[]= null;
private String
sAffyCELfile= null,
sSimpleGSP_ID= null,
sGSP_ID= null,
sClassA= null,
sClassB= null,
sFCcalc= null,
sTestName= null,
s_pValueThr= null,
sFCthr= null,
sHTMLpageLabel= null,
sHTMLpageDescr= null,
sHTMLtissueName= null,
sRelDir= null;
/* ------ index variables for the ftTI Tests-Intersection Table ------- */
/** The index variables for the ftTI Tests-Intersection Table instance
* variables.
*/
//[REFACTOR] private int idxTI[]= null;
private int
idxGeneftTI= -1,
idxFeatureIDftTI= -1,
idxWellIDftTI= -1;
/** The following are computed AFTER the TI is built */
private int
idxMinFCftTI= -1,
idxMaxFCftTI= -1,
idxRangeFCftTI= -1,
idxRangeMeanAftTI= -1,
idxRangeMeanBftTI= -1,
idxPercentFCftTI= -1;
/** Data accumulated while reading the individual tests and then
* used to compute the ranges of the means for inclusion in the final
* ftTI Table. Allocate these to MEANS_ALLOC, but then only use the data
* up to ftTI.tRows (before -limitMaxRows of the Table). The data
* actually used is [0:ftTI.tRows-1].
* Note: only used if '-addRangeOfMeansToTItable' is set.
*/
private float
maxMeanA[]= null,
minMeanA[]= null,
maxMeanB[]= null,
minMeanB[]= null;
/** Allocate xxxMeanX[] and rangeMeanX[] to MEANS_ALLOC, but then
* only use the data up to ftTI.tRows (before -limitMaxRows of the
* Table).
*/
private final int
MEANS_ALLOC= 100000;
/* ----- top level test and MRR data Filters data ------ */
/**
* Specifies the tField name of Tests-ToDo Table data to test.
* It is set by setTestFilterList(). This is used when deciding
* which tests in Tests-ToDo Table to include. All tests are
* included if there is no filterTests. Otherwise, only use those
* that match the filter. The matchTestFilter() tests
* the current data against the filter data.
*/
private String
filterTestField= null;
/**
* Specifies the list of filter instances to test.
* It is set by setTestFilterList(). This is used when deciding
* which tests in Tests-ToDo Table to include. All tests are
* included if there is no filterTests. Otherwise, only
* use those that match the filter. The matchTestFilter() tests
* the current data against the filter data.
*/
private String
filterTestsList[]= null;
/**
* Specifies the tField name of MRR Table data to test. It is set by
* setDataFilterList(). This is used when deciding which data rows
* in MRR Table to include. All tests are included if there is no
* filterTests. Otherwise, only use those that match the filter.
* The matchDataFilter() tests the current data against the filter data.
*/
private String
filterDataField= null;
/**
* Specifies the list of filter instances to test. It is set by
* setDataFilterList(). This is used when deciding which data rows
* in MRR Table to include. All tests are included if there is no
* filterTests. Otherwise, only use those that match the filter.
* The matchDataFilter() tests the current data against the filter data.
*/
private String
filterDataList[]= null;
/* List of tests whose size was limited for inclusion in the generated
* HTML.
*/
public boolean
testsSizeLimitedFlag= false;
/**
* MakeTestsIntersectionTbl() - Constructor
* @param cvt is instance of HTMLtools
* @see #setTestFilterList
* @see #setDataFilterList
*/
public MakeTestsIntersectionTbl(HTMLtools cvt)
{ /* MakeTestsIntersectionTbl */
this.cvt= cvt;
this.fio= cvt.fio;
tdfrd= new TestDataForRelDir();
setTestFilterList(cvt.filterTestField,cvt.filterTestsList);
setDataFilterList(cvt.filterDataField,cvt.filterDataList);
testsSizeLimitedFlag= false;
} /* MakeTestsIntersectionTbl */
/**
* setTestFilterList() - set list of test keywords for filtering tests.
* This is used when deciding which tests in Tests-ToDo Table to include.
* All tests are included if there is no filterTests. Otherwise, only
* use those that match the filter.
* @param filterTestField - tField name of Tests-ToDo Table data to test
* @param filterTests - list of filter instances to test.
* @return true if valid test data.
*/
public boolean setTestFilterList(String filterTestField,
String filterTestsList[])
{ /* setTestFilterList */
if(filterTestField==null || filterTestsList==null)
return(false);
this.filterTestField= filterTestField;
this.filterTestsList= filterTestsList;
return(true);
} /* setTestFilterList */
/**
* setDataFilterList() - set list of data keywords for filtering data.
* This is used when deciding which data rows in MRR Table to include.
* All tests are included if there is no filterTests. Otherwise, only
* use those that match the filter.
* @param filterDataField - tField name of MRR Table data to test
* @param filterDataList - list of filter instances to test.
* @return true if valid test data.
*/
public boolean setDataFilterList(String filterDataField,
String filterDataList[])
{ /* setDataFilterList */
if(filterDataField==null || filterDataList==null)
return(false);
this.filterDataField= filterDataField;
this.filterDataList= filterDataList;
return(true);
} /* setDataFilterList */
/**
* initTestsIntersection() - initialize the input files and I/O paths
* to make sure they are well formed and exist.
* It creates, loads and validates the ftGTT (the "Test-ToDo-List" Table).
* It creates ftTI (the "Test-Intersection" Table) and sets up the
* header and the field indexes.
* It generates the unique list of Relative Directory entries.
* It sets up the this.testsToDoTblPath
* Set by -makeTestsIntersectionTable:{testsToDoFile}.
* @param testsToDoPath - path of test ToDo file
* @return true if data exists, false if any errors
*
* @see FileTable
* @see FileTable#setHasTableHeaderFlag
* @see FileTable#setRmvTrailingBlankLinesFlag
* @see FileTable#setRmvTrailingEmptyColumnsFlag
* @see FileTable#readAndParseTableAll
* @see FileTable#trimTableEnclWhitespace
* @see FileTable#setFieldsToTable
* @see FileTable#setHeadersToTable
* @see FileTable#lookupFieldIdx
* @see FileTable#mapPathFileSeparators
* @see #initTblHeaderIndexes
* @see #makeUniqueRelDirList
* @see UtilCM#logMsg
*/
public boolean initTestsIntersection(String testsToDoPath)
{ /* initTestsIntersection */
/*Check if abort processing by GUI Cancel */
if(cvt.isAbortProcessingFlag())
return(false);
if (! cvt.makeTestsIntersectionTableFlag)
return(false);
/* [1] Read the Test-ToDo Table */
ftGTT= new FileTable("Test-ToDo-List");
ftGTT.setHasTableHeaderFlag(true);
ftGTT.setRmvTrailingBlankLinesFlag(true);
ftGTT.setRmvTrailingEmptyColumnsFlag(true);
if(!ftGTT.readAndParseTable(testsToDoPath))
{
UtilCM.logMsg("Problem, -makeTestsIntersectionTbl: file '"
+ testsToDoPath +
"' not found or ill-formed Table.\n"+
ftGTT.errMsgLog+"\n");
return(false);
}
/* Clean (remove) header and data enclosing whitespace */
ftGTT.trimTableEnclWhitespace(true,true);
/* [1.1] Create empty Test-Intersection Table */
ftTI= new FileTable("Test-Intersection");
ftTI.setHasTableHeaderFlag(true);
ftTI.setNbrTableHdrLines(5);
/* [1.2] Stuff the initial fields and header. We add the other fields later. */
String fieldNames[]= {"Gene", "Feature ID", "Well ID"};
ftTI.setFieldsToTable(fieldNames);
String hdrNames[][]= { {"", "", ""},
{"", "", ""},
{"", "", ""},
{"", "", ""},
fieldNames
};
ftTI.setHeadersToTable(hdrNames,5,3);
/* [1.3] Setup the global field indexes */
idxGeneftTI= ftTI.lookupFieldIdx("Gene");
idxFeatureIDftTI= ftTI.lookupFieldIdx("Feature ID");
idxWellIDftTI= ftTI.lookupFieldIdx("Well ID");
/* [2] Validate the Table as a Test-ToDo-List Table. */
if(!initTblHeaderIndexes())
{
UtilCM.logMsg("Problem, -makeTestsIntersectionTbl: file '"
+ cvt.testsToDoTblFile +
"' missing or ill-formed Table headers.\n");
return(false);
}
/* [3] Generate unique list of Relative Directory entries. */
uniqueRelDir= makeUniqueRelDirList();
/* [4] Setup the tests todo path */
testsToDoTblPath= cvt.tableDataDir + cvt.testsToDoTblFile;
testsToDoTblPath= fio.mapPathFileSeparators(testsToDoTblPath);
UtilCM.logMsg("Using directories:\n"+
" testsToDoTblPath '"+testsToDoTblPath+"'\n"+
" inputDataDir '"+cvt.inputDataDir+"'\n");
return(true);
} /* initTestsIntersection */
/**
* initTblHeaderIndexes() - get TestToDo Table header idx variables
* [REFACTOR] so that the ftGTT files are not hardwired.
* @return true if there is valid data
* @see FileTable#mapPathFileSeparators
*/
private boolean initTblHeaderIndexes()
{ /* initTblHeaderIndexes */
if(ftGTT==null)
return(false);
/* Setup the indexes for use later in picking apart row data */
idxAffyCELfile= ftGTT.lookupFieldIdx("Affy .CEL file (16)");
idxSimpleGSP_ID= ftGTT.lookupFieldIdx("Simple GSP ID (10)");
idxGSP_ID= ftGTT.lookupFieldIdx("GSP ID (9)");
idxClassA= ftGTT.lookupFieldIdx("Class A");
idxClassB= ftGTT.lookupFieldIdx("Class B");
idxFCcalc= ftGTT.lookupFieldIdx("t-Test or Fold-Change Test");
idxTestName= ftGTT.lookupFieldIdx("Test Name");
idx_pValueThr= ftGTT.lookupFieldIdx("p-Value test thresh.");
idxFCthr= ftGTT.lookupFieldIdx("fold-change test thresh.");
idxHTMLpageLabel= ftGTT.lookupFieldIdx("HTML page label");
idxHTMLpageDescr= ftGTT.lookupFieldIdx("HTML page description");
idxHTMLtissueName= ftGTT.lookupFieldIdx("HTML tissue name");
idxRelDir= ftGTT.lookupFieldIdx("Relative directory");
/* Validate the ftGTT field names */
if(idxAffyCELfile==-1 || idxSimpleGSP_ID==-1 || idxGSP_ID==-1 ||
idxClassA==-1 || idxClassB==-1 || idxFCcalc==-1 ||
idxTestName==-1 || idx_pValueThr==-1 || idxFCthr==-1 ||
idxHTMLpageLabel==-1 || idxHTMLpageDescr==-1 ||
idxHTMLtissueName==-1 || idxRelDir==-1)
{
return(false);
}
/* Adjust the file separator for the Relative Directory entries.*/
for(int r=0;r
";
return(summary);
} /* getFilterSummaryHTML */
/* -------------- class TestDataForRelDir (tdfrd instance) ------------- */
/** Class to contain lists of rows of data computed as side effect
* for computing active lists when calling getAllTestDataForRelDir().
*/
class TestDataForRelDir
{
/** Size of xxxRelDir[] arrays for this class. Size of
* uniqueEGgroupsList[0:nTestDataForRelDir-1].
*/
public int
nTestDataForRelDir= 0;
/** List of rows computed as side effect for computing active lists
* when calling getAllTestDataForRelDir().
*/
public int
rowListForRelDir[]= null;
/** List of Expression Groups for rows computed as side effect for
* computing active lists when calling getAllTestDataForRelDir().
*/
public String
EGlistForRelDir[]= null;
/** List of EG summary titles for Expression Groups for rows computed
* as side effect for computing active lists when calling
* getAllTestDataForRelDir(). It will either contain a single EG name
* in which case it is the same as the EGlistForRelDir[] entry
* or it will contain "EGa and EGb" if the A and B classes are different.
*/
public String
EGSummaryTitleForRelDir[]= null;
/** List of class A samples for for rows computed as side effect for
* computing active lists when calling getAllTestDataForRelDir().
*/
public String
classAforRelDir[]= null;
/** List of Class B samples for for rows computed as side effect for
* computing active lists when calling getAllTestDataForRelDir().
*/
public String
classBforRelDir[]= null;
/** List of testNames when rows computed as side effect for computing
* active lists when calling getAllTestDataForRelDir().
*/
public String
testNameListForRelDir[]= null;
/** List of page labels when rows computed as side effect for computing
* active lists when calling getAllTestDataForRelDir().
*/
public String
pageLabelListForRelDir[]= null;
/** List of page descriptions when rows computed as side effect for
* computing active lists when calling getAllTestDataForRelDir().
*/
public String
pageDescrForRelDir[]= null;
/* ----------------- list of unique EGs ------------------ */
/** List of unique Expression Groups computed as side effect
* for computing active lists when calling getAllTestDataForRelDir().
* Size [0:nUniqueEGgroups-1].
*/
public String
uniqueEGgroupsList[]= null;
/** Size of unique Expression Groups. Size of
* uniqueEGgroupsList[0:nUniqueEGgroups-1].
*/
public int
nUniqueEGgroups= 0;
/**
* TestDataForRelDir() - Constructor
*/
public TestDataForRelDir()
{ /* TestDataForRelDir */
} /* TestDataForRelDir */
} /* end of class TestDataForRelDir */
} /* end of class MakeTestsIntersectionTbl */