/** File: MakeTestsIntersectionTbl.java */ /** * Class MakeTestsIntersectionTbl to generate a Tests Intersection Table * from the set of data specified by the {test-TODO-list.txt} table. * It is invoked by the *
 * '-makeTestsIntersectionTbl:{testsToDoFile}' switch.
 * Note: that it also uses switches: 
 *  (required)  -inputDirectory:{input dir} 
 *  (required)  -outputDirectory:{output directory}
 *  (optional)  -mapHdrNames:{mapHdrNamesFile,fromHdrName,toHdrName}
 *  (optional)  -noHTML
 *  (optional)  -saveEditedTable2File:{outTblFile,opt. "HTML"}
 *  (optional)  -filterTestTestIntersection:{testTableField,d1,d2,...,dn}
 *  (optional)  -filterDataTestIntersection:{dataTableField,d1,d2,...,dn}
 *  (optional)  -addFCrangesForTestsIntersectionTable
 *  (optional)  -addRangeOfMeansToTItable
 *  (optional)  -limitMaxTableRows:{maxNbrRows,(opt.)sortByCol,(opt.)'A'or 'D'}
 *  
 * If HTML is generated, then the usual modifiers are used:
 *  (required)  -addProlog:{opt. prolog file name}'
 *  (required)  -addEpilogue:{opt. epilogue file name}'
 *  (optional)  -alternateRowBackgroundColor:{c} 
 *  (optional)  -mapDollarsigns:{$$keyword$$,toString} 
 *  (optional)  -mapQuestionmarks:{??keyword??,toString}
 * 
* *

* * It generates a table ftTI (Tests-Intersection) that contains data from * the individual tests from the '-inputDataDir:{inputDataDir}' organized * by rows of +FC genes/Feature-IDs and -FC genes/Feature-IDs. * The data from the {testsToDoFile} is read into ftGTT and is use to * get additional information for each test as follows. * The multi-row header includes: (starting in 4th column) with data * from the testsToDoFile for each test used: *

 *    'Tissue'          (from the "Test Name")
 *    'FC & pValue' (from the "p-Value test thresh." & "fold-change test thresh.")
 *    'Test label'      (from the "HTML page label')
 *    "MnA/MnB"         (fixed comment indicating fold-change)
 *    'testName'        (from the "Test Name")
 *
 * It is derived from all of the mAdb MRR test results data and 
 * used the mAdb-TestsToDo.xls data for annotation.     
 *
 *  ** Each row has 3 leading params  ("Gene" "Feature ID" "Well ID") that 
 *     can be hyperlinked in the HTML version of the table. 
 *  ** This is followed by the "A-B Mean Difference" value for that test
 *     (for all tests).
 *  ** The test name, tissue, A vs B names, "MnA/MnB" are part of the 5 
 *     line header.
 *  ** The +FC and -FC data are merged (since the same gene can not be found
 *     in both the +FC and -FC reports).
 *  ** All genes found in any test are included in the table. 
 *  ** Where a gene does not show up in a test because the FC was below
 *     threshold, that cell is left blank.
 *  ** if adding FC ranges (-addFCrangesForTestsIntersectionTable switch)
 *     then add three columns "Max FC", "Min FC" and "Range FC".
 *     Note that we can sort on these since the sorting is done after
 *     the table is computed.
 *
 * The following is a (BOGUS data) example of what it might look like.
 *
 *   
 *                                  Erythroid      Liver           . . .
 *                                  Stat5KO vs WT  WT+GH vs WT-GH  . . .
 *                                  MnA/MnB        MnA/MnB         . . .
 *   "Gene" "Feature ID" "Well ID"  EG1-test-1     EG2-test-1      . . .
 *   ------ ------------ ---------  -------------  ---------------
 *   Stat3  1460700_at   123456      1.234         -3.1234         . . .
 *   Sox9   1451538_at   234567                     2.1234         . . .
 *   Sox3   1455899_x_at 345678      2.5432        -4.3210         . . .
 *           etc...
 * 
 *
* *
 * Data in the Test-ToDo-List Table
 * Column entry    Description 
 * "Affy .CEL file (16)" - The Affymetrix .CEL file name in the 
 *                        GSP-Inventory.xls Egxxxx worksheets
 * "Simple GSP ID (10)" - The Simple GSP ID sample name in the 
 *                        GSP-Inventory.xls Egxxxx worksheets
 * "GSP ID (9)" - The default GSP ID sample name in the GSP-Inventory.xls 
 *                        EGxxxx worksheets
 * "Class A" - The samples in class A for the test
 * "Class B" - The samples in class B for the test
 * "t-Test or Fold-Change Test" - A/B indicates a fold change reported
 *                        as A samples / B samples.
 * "Test Name" - The unique name of the test  formed from the Egxxx number.
 *                        E.g., EG5.1-test-3
 * "p-Value test thresh" -  If there are at least 2 samples for both A 
 *                        and B classes, this is the p-value threshold 
 *                        to be used. It is empty if there are not at 
 *                        least 2 samples/class.
 * "fold-change test thresh" -  The fold-change threshold to be used. 
 *                        It is shown as +/-nX since we do the test for 
 *                        genes > nX and genes < 1/nX and report them 
 *                        separately.
 * "HTML page label" - Converter web page content page label
 * "HTML page description" - Converter web page description content page label
 * "HTML tissue name" - Converter tissue name used in the Web page and in 
 *                        possibly in other  areas.
 * "Relative directory" - Relative subdirectory entry in the directory 
 *                        tree used for a) mAdb tests results (.txt & .zip 
 *                        files) go, b) converter input data and HTML and 
 *                        JTV output data, and c) the Jak-Stat Prospector Web 
 *                        site subdirectory tree.
 * 
 * Test name usage:  File names generated using the 'Test Name'
 * tests
 * Tests for samples:     
 *  {testName}+FC.txt       
 *  {testName}-FC.txt
 * 
 * The converter output will be a mixture of .txt, .html files, and 
 * processed JTV directories and .zip files.
 * The JTV .zip files have the .zip removed and an HTML file generated 
 * to start up the JTV applet from the Web page.
 * 
 * List of variables set by switch:
 * -makeTestsIntersectionTbl:{testsToDoFile}
 * cvt.makeTestsIntersectionTableFlag
 * cvt.testsToDoTblFile
 * cvt.filterTestField
 * cvt.filterTestsList
 * cvt.filterDataField
 * cvt.filterDataList
 * cvt.addFCrangesForTestsIntersectionTableFlag
 * cvt.addRangeOfMeansToTItableFlag
 *
 * List of Methods
* =================== * MakeTestsIntersectionTbl() - Constructor * setTestFilterList() - set list of test keywords for filtering tests. * setDataFilterList() - set list of data keywords for filtering data. * initTestsIntersection() - initialize the input files and I/O paths * initTblHeaderIndexes() - get TestToDo Table header idx variables * getTestToDoRowData() - get and save row data to class 's'prefix variables. * createTestsIntersectionTable() - create the Tests-Intersection Table. * addAllTestsDataToTI() - add all Tests Data to ftTI Table. * addFCdataInstanceToTI() - add gene FC data to Test-Intersection Table. * makeUniqueRelDirList() - generate unique list of Relative Dir. entries. * getAllTestDataForRelDir() - get list of test data for rel-dir in tdfrd. * cvtSimpleGspId2EG() - map Simple GSP ID to an EGxxxx.y * matchTestFilter() - test current test tField name of ft Table * matchDataFilter() - test the tField name of current MRR Table data. * addFCrangesForTestsIntersectionTable() - adding FC range computations * calcRunningMaxMinMeanABvalues() - calc row r ft max and min MeanA(B) values * getFilterSummaryHTML() - get an HTML summary of the test/data filters used. * * Internal Class
* =================== * Class TestDataForRelDir contains lists of rows of data computed as side * effect for computing active lists when calling getAllTestDataForRelDir(). *
* *

* This code is available at the HTMLtools project on SourceForge at * http://htmltools.sourceforge.net/ * under the "Common Public License Version 1.0" * * http://www.opensource.org/licenses/cpl1.0.php.

*

* It was derived and refactored from the open source * MAExplorer (http://maexplorer.sourceforge.net/), and * Open2Dprot (http://Open2Dprot.sourceforge.net/) Table modules. *

* $Date: 2009/07/20 11:45:56 $ $Revision: 1.28 $ *
* Copyright 2008, 2009 by Peter Lemkin * E-Mail: lemkin@users.sourceforge.net * http://lemkingroup.com/ *
*/ public class MakeTestsIntersectionTbl { /* Note all global variables are in Globals.java except the help * messages which are in HelpMsgs.java. */ public HTMLtools cvt; /** Global fileTable instance */ public FileTable fio; /** Class to contain lists of rows of data computed as side effect * for computing active lists when calling getAllTestDataForRelDir(). */ private TestDataForRelDir tdfrd= null; /** Unique list of Relative Directory entries. */ public String uniqueRelDir[]= null; /* ---------- FileTables used in the batch generator ----------- */ /** The FileTable created from the {test-ToDo-list.txt} file. * Set with '-MakeTestsIntersectionTbl:{mAdb-TestsToDo.txt}'. */ public FileTable ftGTT= null; /** The FileTable Test-Intersection Table created from the tests data * that intersection the genes in the tests. */ public FileTable ftTI= null; /* --------- args from the command Switches parser ---------- */ /** The path for the tests directory from the current directory. */ private String testsToDoTblPath= null; /** Input tree dir path from current directory */ public String testsInputTreePath= null; /* ------ index variables for the Test-ToDo ftGTT Table ------- */ /** The index variables for the Test-ToDo ftGTT Table instance * variables. */ //[REFACTOR] private int idxGTT[]= null; private int idxAffyCELfile= -1, idxSimpleGSP_ID= -1, idxGSP_ID= -1, idxClassA= -1, idxClassB= -1, idxFCcalc= -1, idxTestName= -1, idx_pValueThr= -1, idxFCthr= -1, idxHTMLpageLabel= -1, idxHTMLpageDescr= -1, idxHTMLtissueName= -1, idxRelDir= -1; /** The Test-ToDo ftGTT Table instance variables */ //[REFACTOR] private String sRowGTT[]= null; private String sAffyCELfile= null, sSimpleGSP_ID= null, sGSP_ID= null, sClassA= null, sClassB= null, sFCcalc= null, sTestName= null, s_pValueThr= null, sFCthr= null, sHTMLpageLabel= null, sHTMLpageDescr= null, sHTMLtissueName= null, sRelDir= null; /* ------ index variables for the ftTI Tests-Intersection Table ------- */ /** The index variables for the ftTI Tests-Intersection Table instance * variables. */ //[REFACTOR] private int idxTI[]= null; private int idxGeneftTI= -1, idxFeatureIDftTI= -1, idxWellIDftTI= -1; /** The following are computed AFTER the TI is built */ private int idxMinFCftTI= -1, idxMaxFCftTI= -1, idxRangeFCftTI= -1, idxRangeMeanAftTI= -1, idxRangeMeanBftTI= -1, idxPercentFCftTI= -1; /** Data accumulated while reading the individual tests and then * used to compute the ranges of the means for inclusion in the final * ftTI Table. Allocate these to MEANS_ALLOC, but then only use the data * up to ftTI.tRows (before -limitMaxRows of the Table). The data * actually used is [0:ftTI.tRows-1]. * Note: only used if '-addRangeOfMeansToTItable' is set. */ private float maxMeanA[]= null, minMeanA[]= null, maxMeanB[]= null, minMeanB[]= null; /** Allocate xxxMeanX[] and rangeMeanX[] to MEANS_ALLOC, but then * only use the data up to ftTI.tRows (before -limitMaxRows of the * Table). */ private final int MEANS_ALLOC= 100000; /* ----- top level test and MRR data Filters data ------ */ /** * Specifies the tField name of Tests-ToDo Table data to test. * It is set by setTestFilterList(). This is used when deciding * which tests in Tests-ToDo Table to include. All tests are * included if there is no filterTests. Otherwise, only use those * that match the filter. The matchTestFilter() tests * the current data against the filter data. */ private String filterTestField= null; /** * Specifies the list of filter instances to test. * It is set by setTestFilterList(). This is used when deciding * which tests in Tests-ToDo Table to include. All tests are * included if there is no filterTests. Otherwise, only * use those that match the filter. The matchTestFilter() tests * the current data against the filter data. */ private String filterTestsList[]= null; /** * Specifies the tField name of MRR Table data to test. It is set by * setDataFilterList(). This is used when deciding which data rows * in MRR Table to include. All tests are included if there is no * filterTests. Otherwise, only use those that match the filter. * The matchDataFilter() tests the current data against the filter data. */ private String filterDataField= null; /** * Specifies the list of filter instances to test. It is set by * setDataFilterList(). This is used when deciding which data rows * in MRR Table to include. All tests are included if there is no * filterTests. Otherwise, only use those that match the filter. * The matchDataFilter() tests the current data against the filter data. */ private String filterDataList[]= null; /* List of tests whose size was limited for inclusion in the generated * HTML. */ public boolean testsSizeLimitedFlag= false; /** * MakeTestsIntersectionTbl() - Constructor * @param cvt is instance of HTMLtools * @see #setTestFilterList * @see #setDataFilterList */ public MakeTestsIntersectionTbl(HTMLtools cvt) { /* MakeTestsIntersectionTbl */ this.cvt= cvt; this.fio= cvt.fio; tdfrd= new TestDataForRelDir(); setTestFilterList(cvt.filterTestField,cvt.filterTestsList); setDataFilterList(cvt.filterDataField,cvt.filterDataList); testsSizeLimitedFlag= false; } /* MakeTestsIntersectionTbl */ /** * setTestFilterList() - set list of test keywords for filtering tests. * This is used when deciding which tests in Tests-ToDo Table to include. * All tests are included if there is no filterTests. Otherwise, only * use those that match the filter. * @param filterTestField - tField name of Tests-ToDo Table data to test * @param filterTests - list of filter instances to test. * @return true if valid test data. */ public boolean setTestFilterList(String filterTestField, String filterTestsList[]) { /* setTestFilterList */ if(filterTestField==null || filterTestsList==null) return(false); this.filterTestField= filterTestField; this.filterTestsList= filterTestsList; return(true); } /* setTestFilterList */ /** * setDataFilterList() - set list of data keywords for filtering data. * This is used when deciding which data rows in MRR Table to include. * All tests are included if there is no filterTests. Otherwise, only * use those that match the filter. * @param filterDataField - tField name of MRR Table data to test * @param filterDataList - list of filter instances to test. * @return true if valid test data. */ public boolean setDataFilterList(String filterDataField, String filterDataList[]) { /* setDataFilterList */ if(filterDataField==null || filterDataList==null) return(false); this.filterDataField= filterDataField; this.filterDataList= filterDataList; return(true); } /* setDataFilterList */ /** * initTestsIntersection() - initialize the input files and I/O paths * to make sure they are well formed and exist. * It creates, loads and validates the ftGTT (the "Test-ToDo-List" Table). * It creates ftTI (the "Test-Intersection" Table) and sets up the * header and the field indexes. * It generates the unique list of Relative Directory entries. * It sets up the this.testsToDoTblPath * Set by -makeTestsIntersectionTable:{testsToDoFile}. * @param testsToDoPath - path of test ToDo file * @return true if data exists, false if any errors * * @see FileTable * @see FileTable#setHasTableHeaderFlag * @see FileTable#setRmvTrailingBlankLinesFlag * @see FileTable#setRmvTrailingEmptyColumnsFlag * @see FileTable#readAndParseTableAll * @see FileTable#trimTableEnclWhitespace * @see FileTable#setFieldsToTable * @see FileTable#setHeadersToTable * @see FileTable#lookupFieldIdx * @see FileTable#mapPathFileSeparators * @see #initTblHeaderIndexes * @see #makeUniqueRelDirList * @see UtilCM#logMsg */ public boolean initTestsIntersection(String testsToDoPath) { /* initTestsIntersection */ /*Check if abort processing by GUI Cancel */ if(cvt.isAbortProcessingFlag()) return(false); if (! cvt.makeTestsIntersectionTableFlag) return(false); /* [1] Read the Test-ToDo Table */ ftGTT= new FileTable("Test-ToDo-List"); ftGTT.setHasTableHeaderFlag(true); ftGTT.setRmvTrailingBlankLinesFlag(true); ftGTT.setRmvTrailingEmptyColumnsFlag(true); if(!ftGTT.readAndParseTable(testsToDoPath)) { UtilCM.logMsg("Problem, -makeTestsIntersectionTbl: file '" + testsToDoPath + "' not found or ill-formed Table.\n"+ ftGTT.errMsgLog+"\n"); return(false); } /* Clean (remove) header and data enclosing whitespace */ ftGTT.trimTableEnclWhitespace(true,true); /* [1.1] Create empty Test-Intersection Table */ ftTI= new FileTable("Test-Intersection"); ftTI.setHasTableHeaderFlag(true); ftTI.setNbrTableHdrLines(5); /* [1.2] Stuff the initial fields and header. We add the other fields later. */ String fieldNames[]= {"Gene", "Feature ID", "Well ID"}; ftTI.setFieldsToTable(fieldNames); String hdrNames[][]= { {"", "", ""}, {"", "", ""}, {"", "", ""}, {"", "", ""}, fieldNames }; ftTI.setHeadersToTable(hdrNames,5,3); /* [1.3] Setup the global field indexes */ idxGeneftTI= ftTI.lookupFieldIdx("Gene"); idxFeatureIDftTI= ftTI.lookupFieldIdx("Feature ID"); idxWellIDftTI= ftTI.lookupFieldIdx("Well ID"); /* [2] Validate the Table as a Test-ToDo-List Table. */ if(!initTblHeaderIndexes()) { UtilCM.logMsg("Problem, -makeTestsIntersectionTbl: file '" + cvt.testsToDoTblFile + "' missing or ill-formed Table headers.\n"); return(false); } /* [3] Generate unique list of Relative Directory entries. */ uniqueRelDir= makeUniqueRelDirList(); /* [4] Setup the tests todo path */ testsToDoTblPath= cvt.tableDataDir + cvt.testsToDoTblFile; testsToDoTblPath= fio.mapPathFileSeparators(testsToDoTblPath); UtilCM.logMsg("Using directories:\n"+ " testsToDoTblPath '"+testsToDoTblPath+"'\n"+ " inputDataDir '"+cvt.inputDataDir+"'\n"); return(true); } /* initTestsIntersection */ /** * initTblHeaderIndexes() - get TestToDo Table header idx variables * [REFACTOR] so that the ftGTT files are not hardwired. * @return true if there is valid data * @see FileTable#mapPathFileSeparators */ private boolean initTblHeaderIndexes() { /* initTblHeaderIndexes */ if(ftGTT==null) return(false); /* Setup the indexes for use later in picking apart row data */ idxAffyCELfile= ftGTT.lookupFieldIdx("Affy .CEL file (16)"); idxSimpleGSP_ID= ftGTT.lookupFieldIdx("Simple GSP ID (10)"); idxGSP_ID= ftGTT.lookupFieldIdx("GSP ID (9)"); idxClassA= ftGTT.lookupFieldIdx("Class A"); idxClassB= ftGTT.lookupFieldIdx("Class B"); idxFCcalc= ftGTT.lookupFieldIdx("t-Test or Fold-Change Test"); idxTestName= ftGTT.lookupFieldIdx("Test Name"); idx_pValueThr= ftGTT.lookupFieldIdx("p-Value test thresh."); idxFCthr= ftGTT.lookupFieldIdx("fold-change test thresh."); idxHTMLpageLabel= ftGTT.lookupFieldIdx("HTML page label"); idxHTMLpageDescr= ftGTT.lookupFieldIdx("HTML page description"); idxHTMLtissueName= ftGTT.lookupFieldIdx("HTML tissue name"); idxRelDir= ftGTT.lookupFieldIdx("Relative directory"); /* Validate the ftGTT field names */ if(idxAffyCELfile==-1 || idxSimpleGSP_ID==-1 || idxGSP_ID==-1 || idxClassA==-1 || idxClassB==-1 || idxFCcalc==-1 || idxTestName==-1 || idx_pValueThr==-1 || idxFCthr==-1 || idxHTMLpageLabel==-1 || idxHTMLpageDescr==-1 || idxHTMLtissueName==-1 || idxRelDir==-1) { return(false); } /* Adjust the file separator for the Relative Directory entries.*/ for(int r=0;r=ftGTT.tRows) return(false); /* Allocate the Test-ToDo Table instance variables */ String rowData[]= ftGTT.tData[r]; sAffyCELfile= rowData[idxAffyCELfile]; sSimpleGSP_ID= rowData[idxSimpleGSP_ID]; sGSP_ID= rowData[idxGSP_ID]; sClassA= rowData[idxClassA]; sClassB= rowData[idxClassB]; sFCcalc= rowData[idxFCcalc]; sTestName= rowData[idxTestName]; s_pValueThr= rowData[idx_pValueThr]; sFCthr= rowData[idxFCthr]; sHTMLpageLabel= rowData[idxHTMLpageLabel]; sHTMLpageDescr= rowData[idxHTMLpageDescr]; sHTMLtissueName= rowData[idxHTMLtissueName]; sRelDir= rowData[idxRelDir]; return(true); } /* getTestToDoRowData */ /** * createTestsIntersectionTable() - create the Tests-Intersection Table. * Process the Test-ToDo Table for each Rel.Dir. Filter out * tests that do not match the -filterTest or -filterData filters * then compute the FC for each gene row and then add a new * labeled column to the ftTI Table. * If adding FC range computations, then expand the table and * add the three columns "Min FC" "Max FC" FC Range". * This is set by the '-addFCrangesForTestsIntersectionTable' switch. * @return true if created the Tests-Intersection Table. * * @see #getAllTestDataForRelDir * @see #matchTestFilter * @see #addAllTestsDataToTI * @see #addFCrangesForTestsIntersectionTable * @see UtilCM#logMsg */ public boolean createTestsIntersectionTable() { /* createTestsIntersectionTable */ /* [1] Initialization */ int tstNbr= 0, nRelDir= uniqueRelDir.length; /* [1.1] Allocate tables for data accumulated while reading the individual * tests and then used to compute the ranges of the means for inclusion * in the final ftTI Table. Allocate these to 100K, but then only use the * data up to ftTI.tRows (before -limitMaxRows of the Table). The data * actually used is [0:ftTI.tRows-1]. Note: if '-addRangeOfMeansToTItable' * is set */ if(cvt.addRangeOfMeansToTItableFlag) { /* allocate the data arrays */ maxMeanA= new float[MEANS_ALLOC]; minMeanA= new float[MEANS_ALLOC]; maxMeanB= new float[MEANS_ALLOC]; minMeanB= new float[MEANS_ALLOC]; for(int i= 0; i < MEANS_ALLOC; i++) { /* set default values which will be overidden where data exists */ maxMeanA[i]= -1.0F; minMeanA[i]= 100000000.0F; maxMeanB[i]= -1.0F; minMeanB[i]= 100000000.0F; } } /* allocate the data arrays */ /* [2] Process the Test-ToDo Table for each Rel.Dir. Filter out * tests that do not match the -filterTest or -filterData filters * then compute the FC for each gene row and then add a new * labeled column to the ftTI Table. */ for(int d=0;dcvt.limitMaxTableRows) { /* check if limit max rows */ int nTIrows= ftTI.tRows, nTIrowsKept= nTIrows; if(cvt.limitMaxTableRowsFlag) { /* Limit the maximum number of rows in the Table */ boolean useAbsValueFlag= false, /* since "Range FC" >= 0 */ doSortFirstFlag= true, /* sort before limit */ limitOK= ftTI.limitMaxRowsSortedByField(cvt.limitRowsSortByColName, cvt.limitRowsSortAscendingFlag, cvt.limitMaxTableRows, doSortFirstFlag, useAbsValueFlag); if(ftTI.errMsgLog.length()>0) { UtilCM.logMsg(ftTI.errMsgLog+"\n"); ftTI.errMsgLog= ""; } /* Report the changes */ nTIrowsKept= ftTI.tRows; if(nTIrows>nTIrowsKept) { UtilCM.logMsg("Limited size of Tests-Intersection Table from "+ nTIrows+" rows to "+nTIrowsKept+" rows.\n"); testsSizeLimitedFlag= true; } } /* Limit the maximum number of rows in the Table */ } /* check if limit max rows */ return(true); } /* createTestsIntersectionTable */ /** * addAllTestsDataToTI() - add all Tests Data to ftTI Table for a Tissue. * This is done by computing the compute the FC for each gene row * where it will be either +FC or -FC since they are mutually exclusive * and then add a new labeled column to the ftTI Table. * @param ftGTT - tests-todo Table used in the filter if we are filtering. * @param paramTestPath - directory to put parameter map files * @param tissueName - is the main CellTypeTissue for header * @param relativeDir - is the main Relative Directory for header * where to put the HTML files. * @return true if succeed, else 0 * * @see FileTable#mapPathFileSeparators * @see FileTable#setHasTableHeaderFlag * @see FileTable#setDuplicateFieldsFlag * @see FileTable#setNbrTableHdrLines * @see FileTable#setRmvTrailingBlankLinesFlag * @see FileTable#setRmvTrailingEmptyColumnsFlag * @see FileTable#setHasEmptyLineBeforeTableFlag * @see FileTable#setStartTableAtColStr * @see FileTable#readAndParseTableAll * @see #addFCdataInstanceToTI * @see UtilCM#logMsg */ private boolean addAllTestsDataToTI(FileTable ftGTT, String tissueName, String relativeDir) { /* addAllTestsDataToTI */ /* [1] Init */ /* [1.1] Get the relative Analysis directories that put into * the input/output Dir switches. */ String inputRelDir= cvt.inputDataDir+relativeDir; /* Make sure that it is standard Unix specification for paths */ inputRelDir= fio.mapPathFileSeparators(inputRelDir); /* [2] Process all of the tests associated with this tissue. */ int tNbr= 0; /* [2.1] Add lists of test permutations for this test. */ for(int k=0;k0) for(int rp=0;rp0) for(int rm=0;rm * It computes the sorted lists [0:nTestDataForRelDir-1] of data for * documenting the tests for the relative directory. * (EGlistForRelDir, EGSummaryTitleForRelDir, classAforRelDir, * classBforRelDir, pageLabelListForRelDir, pageDescrForRelDir, * testNameListForRelDir, rowListForRelDir). * It also computes a list uniqueEGgroups[0:-1]. * * The data is saved, in global class instance tdfrd, as a list of rows of * matching test data found. * @param lookForRelDir is the relative directory to look for * @return true if succeed with data left in class variables, * else false if an error. */ private boolean getAllTestDataForRelDir(String relDir) { /* makeUniqueRelDirList */ if(relDir==null || ftGTT==null || ftGTT.tRows==0) return(false); int nRows= ftGTT.tRows, workRowListForRelDir[]= new int[nRows], nTests= 0, nUnique= 0; String uniqueEGgroups[]= new String[nRows], workEGlistForRelDir[]= new String[nRows], workEGSummaryTitleForRelDir[]= new String[nRows], workClassAforRelDir[]= new String[nRows], workClassBforRelDir[]= new String[nRows], workPageLabelListForRelDir[]= new String[nRows], workPageDescrForRelDir[]= new String[nRows], workTestNameListForRelDir[]= new String[nRows]; /* [1] Create list of test data for a relative directory in the * tdfrd. class instance. */ for(int pass=1;pass<=2;pass++) { /* insert A=B in first pass, A!=B in 2nd pass */ for(int r=0;r=ft.tRows) return(true); int idxFilter= ft.lookupFieldIdx(filterTestField); if(idxFilter==-1) return(true); /* Bad field name, let it pass */ int lth= filterTestsList.length; String cellToTest= ft.tData[rowToTest][idxFilter]; for(int i=0;i=ftMRR.tRows) return(true); int idxFilter= ftMRR.lookupFieldIdx(filterDataField); if(idxFilter==-1) return(true); /* Bad field name, let it pass */ int lth= filterDataList.length; String cellToTest= ftMRR.tData[rowNbr][idxFilter]; for(int i=0;i0) countFCs++; /* has FC data */ } int percentFC= (int)((100.0*countFCs)/totalFCs); commonData[r]= (rangeData[r]==0.0F) ? "" : (""+percentFC); } String newPercentFChdr[]= {"","","","","FC counts %"}; if(!ftTI.addColumnToTable("FC counts %",newPercentFChdr,commonData,true)) { UtilCM.logMsg("Problem adding new 'Range Mn A' column data for TI.\n"); return(false); } /* [4.2.4] Lookup the indexes. */ idxRangeMeanAftTI= ftTI.lookupFieldIdx("Range Mean A"); idxRangeMeanBftTI= ftTI.lookupFieldIdx("Range Mean B"); idxPercentFCftTI= ftTI.lookupFieldIdx("FC counts %"); } /* compute the means and counts */ return(true); } /* addFCrangesForTestsIntersectionTable */ /** * calcRunningMaxMinMeanABvalues() - calc row r ftData max and min MeanA(B) values * Get the data from the ft fields "A Mean" and "B Mean". * Save data in maxMeanA[rM], minMeanA[rM], maxMeanB[rM],minMeanB[rM] * @param rM index for max/min arrays and matches ftTI data. * @param rD is row to get means data from the current test Table * @param ftD is the data table to use * @param idxMeanA_FC is the index of "A Mean" * @param idxMeanB_FC is the index of "B Mean" * @return true if succeed. */ private boolean calcRunningMaxMinMeanABvalues(int rM, int rD, FileTable ftD, int idxMeanA_FC, int idxMeanB_FC) { /* calcRunningMaxMinMeanABvalues */ /* Track the max and min of the MeanA and MeanB values */ if(rM<0 || rD<0 || ftD==null || rD>=ftD.tRows || idxMeanA_FC>=ftD.tCols || idxMeanB_FC>=ftD.tCols) return(false); String sMeanA= ftD.tData[rD][idxMeanA_FC], sMeanB= ftD.tData[rD][idxMeanB_FC]; float meanA= UtilCM.cvs2f(sMeanA,0), meanB= UtilCM.cvs2f(sMeanB,0); maxMeanA[rM]= Math.max(meanA,maxMeanA[rM]); minMeanA[rM]= Math.min(meanA,minMeanA[rM]); maxMeanB[rM]= Math.max(meanB,maxMeanB[rM]); minMeanB[rM]= Math.min(meanB,minMeanB[rM]); return(true); } /* calcRunningMaxMinMeanABvalues */ /** * getFilterSummaryHTML() - get an HTML summary of the test/data filters used. * This adds additional documentation based on which options were * selected. * @return summary else ""; */ public String getFilterSummaryHTML(String baseFileName) { /* getFilterSummaryHTML */ String summary= ""; if(filterTestField!=null) { summary += "The Tests-Intersection was filtered by tests matching: "; for(int i=0;i' data matching: "; for(int i=0;i'.\n"; summary += "Fold-change (FC) magnitudes less than the "+ "threshold fold-changes are indicated by blank cells "+ "in the table. The 'Range FC' values are computed as " + "('Max FC' - 'Min FC').\n"; /* If limiting the number of rows in the TI Table by deleting any rows * beyond 'maxNbrRows' then report what the current and previous table * sizes are. Set by '-limitMaxTableRows:{maxNbrRows,sortFirstByColName, * 'A'scending or 'D'escending}' switch. */ if(this.testsSizeLimitedFlag) { int nFullRows= ftTI.tDataFull.length; summary += "The Tests-Intersections results were limited to the "+ cvt.limitMaxTableRows+ " probe fold-change entries with the " + "highest magnitudes. The full table with "+nFullRows+ " rows is available as an "+ "Excel file.\n"; } /* If adding the ('Range Mean A', 'Range Mean B', 'FC counts %') * presented as log2 values computations to an expanded TI table. * Set by the '-addRangeOfMeans' switch. */ if(cvt.addRangeOfMeansToTItableFlag) { summary += "The ('Range Mean A', 'Range Mean B', 'FC counts %') data " + "are computed for each probe. The 'Range Means' are computed " + "for 'A Mean' and 'B Mean' in the gene expression data and are " + "presented as log2 values computed as " + "{max('A Mean')-min('A Mean')} and {max('B Mean')-min('B Mean')}." + "This is useful when looking at FC since high FC values with low " + "means are less meaningful. The 'FC counts %' are the percent " + "of tests where the FC exceeded the FC threshold.\n"; } summary += "\n
"; return(summary); } /* getFilterSummaryHTML */ /* -------------- class TestDataForRelDir (tdfrd instance) ------------- */ /** Class to contain lists of rows of data computed as side effect * for computing active lists when calling getAllTestDataForRelDir(). */ class TestDataForRelDir { /** Size of xxxRelDir[] arrays for this class. Size of * uniqueEGgroupsList[0:nTestDataForRelDir-1]. */ public int nTestDataForRelDir= 0; /** List of rows computed as side effect for computing active lists * when calling getAllTestDataForRelDir(). */ public int rowListForRelDir[]= null; /** List of Expression Groups for rows computed as side effect for * computing active lists when calling getAllTestDataForRelDir(). */ public String EGlistForRelDir[]= null; /** List of EG summary titles for Expression Groups for rows computed * as side effect for computing active lists when calling * getAllTestDataForRelDir(). It will either contain a single EG name * in which case it is the same as the EGlistForRelDir[] entry * or it will contain "EGa and EGb" if the A and B classes are different. */ public String EGSummaryTitleForRelDir[]= null; /** List of class A samples for for rows computed as side effect for * computing active lists when calling getAllTestDataForRelDir(). */ public String classAforRelDir[]= null; /** List of Class B samples for for rows computed as side effect for * computing active lists when calling getAllTestDataForRelDir(). */ public String classBforRelDir[]= null; /** List of testNames when rows computed as side effect for computing * active lists when calling getAllTestDataForRelDir(). */ public String testNameListForRelDir[]= null; /** List of page labels when rows computed as side effect for computing * active lists when calling getAllTestDataForRelDir(). */ public String pageLabelListForRelDir[]= null; /** List of page descriptions when rows computed as side effect for * computing active lists when calling getAllTestDataForRelDir(). */ public String pageDescrForRelDir[]= null; /* ----------------- list of unique EGs ------------------ */ /** List of unique Expression Groups computed as side effect * for computing active lists when calling getAllTestDataForRelDir(). * Size [0:nUniqueEGgroups-1]. */ public String uniqueEGgroupsList[]= null; /** Size of unique Expression Groups. Size of * uniqueEGgroupsList[0:nUniqueEGgroups-1]. */ public int nUniqueEGgroups= 0; /** * TestDataForRelDir() - Constructor */ public TestDataForRelDir() { /* TestDataForRelDir */ } /* TestDataForRelDir */ } /* end of class TestDataForRelDir */ } /* end of class MakeTestsIntersectionTbl */