in progress...
authorcmzmasek <chris.zma@outlook.com>
Mon, 21 Aug 2017 18:56:31 +0000 (11:56 -0700)
committercmzmasek <chris.zma@outlook.com>
Mon, 21 Aug 2017 18:56:31 +0000 (11:56 -0700)
forester/.classpath
forester/.gitignore
forester/java/forester.jar
forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java
forester/java/src/org/forester/clade_analysis/Result2.java
forester/perl/phylo_pl.pl
forester/ruby/evoruby/lib/evo/tool/domains_to_forester.rb
forester/ruby/evoruby/lib/evo/tool/phylogenies_decorator.rb
forester/ruby/evoruby/lib/evo/util/constants.rb

index 4437209..baf7e77 100644 (file)
@@ -2,8 +2,8 @@
 <classpath>
        <classpathentry kind="src" path="java/src"/>
        <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
-       <classpathentry kind="lib" path="java/resources/itextpdf-5.1.0.jar"/>
        <classpathentry kind="lib" path="java/resources/commons-codec-1.5.jar"/>
        <classpathentry kind="lib" path="java/resources/openchart.jar"/>
+       <classpathentry kind="lib" path="/home/zma/git/forester/forester/java/resources/itextpdf-5.5.9.jar"/>
        <classpathentry kind="output" path="bin"/>
 </classpath>
index ae3c172..7b24670 100644 (file)
@@ -1 +1,12 @@
-/bin/
+.metadata
+bin/
+tmp/
+java/classes/
+*.class
+*.tmp
+*.bak
+*.swp
+local.properties
+.project
+
+
index 399915a..bbb00f9 100644 (file)
Binary files a/forester/java/forester.jar and b/forester/java/forester.jar differ
index 207b57c..5a5afea 100644 (file)
@@ -32,7 +32,15 @@ public class CladeAnalysisTest {
             failed = true;
         }
         if ( !testCladeAnalysis4() ) {
-            System.out.println( "Clade analysis 3 failed" );
+            System.out.println( "Clade analysis 4 failed" );
+            failed = true;
+        }
+        if ( !testCladeAnalysis5() ) {
+            System.out.println( "Clade analysis 5 failed" );
+            failed = true;
+        }
+        if ( !testCladeAnalysis6() ) {
+            System.out.println( "Clade analysis 6 failed" );
             failed = true;
         }
         if ( !failed ) {
@@ -53,6 +61,12 @@ public class CladeAnalysisTest {
         if ( !testCladeAnalysis4() ) {
             return false;
         }
+        if ( !testCladeAnalysis5() ) {
+            return false;
+        }
+        if ( !testCladeAnalysis6() ) {
+            return false;
+        }
         return true;
     }
 
@@ -761,4 +775,88 @@ public class CladeAnalysisTest {
         }
         return true;
     }
+    
+    private static boolean testCladeAnalysis5() {
+        try {
+            final File intreefile1 = new File( PATH_TO_TEST_DATA + "clade_analysis_test_3.xml" );
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( intreefile1, true );
+            final Phylogeny p1 = factory.create( intreefile1, pp )[ 0 ];
+            Pattern query = Pattern.compile(".+#\\d+_M=(.+)");
+            Result2 res = Analysis2.execute( p1, query, "." );
+            
+            res.analyzeGreatestCommonPrefixes( 0.3 );
+            System.out.print( res.toString());
+            System.out.println( "------------------------- ");
+            System.out.println();
+            
+           // Result res = Analysis.execute( p1, "A.1.1.1", "." );
+           /* if ( !res.getGreatestCommonPrefix().equals( "A.1" ) ) {
+                return false;
+            }
+            if ( !res.getGreatestCommonPrefixDown().equals( "A.1.1" ) ) {
+                return false;
+            }
+            if ( !res.getGreatestCommonPrefixUp().equals( "A.1.2.1" ) ) {
+                return false;
+            }
+            if ( res.getLeastEncompassingCladeSize() != 4 ) {
+                return false;
+            }
+            if ( res.getTreeSize() != 25 ) {
+                return false;
+            }
+            if ( res.getWarnings().size() != 0 ) {
+                return false;
+            }*/
+          
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+    
+    private static boolean testCladeAnalysis6() {
+        try {
+            final File intreefile1 = new File( PATH_TO_TEST_DATA + "clade_analysis_test_4.xml" );
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( intreefile1, true );
+            final Phylogeny p1 = factory.create( intreefile1, pp )[ 0 ];
+            Pattern query = Pattern.compile(".+#\\d+_M=(.+)");
+            Result2 res = Analysis2.execute( p1, query, "." );
+            
+            res.analyzeGreatestCommonPrefixes( 0.45 );
+            System.out.print( res.toString());
+            System.out.println( "------------------------- ");
+            System.out.println();
+            
+           // Result res = Analysis.execute( p1, "A.1.1.1", "." );
+           /* if ( !res.getGreatestCommonPrefix().equals( "A.1" ) ) {
+                return false;
+            }
+            if ( !res.getGreatestCommonPrefixDown().equals( "A.1.1" ) ) {
+                return false;
+            }
+            if ( !res.getGreatestCommonPrefixUp().equals( "A.1.2.1" ) ) {
+                return false;
+            }
+            if ( res.getLeastEncompassingCladeSize() != 4 ) {
+                return false;
+            }
+            if ( res.getTreeSize() != 25 ) {
+                return false;
+            }
+            if ( res.getWarnings().size() != 0 ) {
+                return false;
+            }*/
+          
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
 }
index 2bd4911..1cfc3ff 100644 (file)
@@ -258,8 +258,7 @@ public final class Result2 {
 
             @Override
             public int compare( final Prefix x, final Prefix y ) {
-                final int start_comparison = compare( x.getConfidence(), y.getConfidence() );
-                return start_comparison;
+                return compare( x.getConfidence(), y.getConfidence() );
             }
 
             private int compare( final double a, final double b ) {
index 39d6e20..77c603d 100755 (executable)
@@ -762,13 +762,13 @@ if ( $use_raxml == 1 ) {
     }
     
     # NOTE. RaxML does its own bootstrapping.
-    &executeRaxml( "align", $RAXML_MODEL_BASE.$invar.$model."F", $bootstraps, $seed, "xxx", $RAXML_ALGORITHM );
+    &executeRaxml( "align", $RAXML_MODEL_BASE.$invar.$model."X", $bootstraps, $seed, "xxx", $RAXML_ALGORITHM );
     print( "\n========== RAxML end =========\n\n" );
     
     &rm( "RAxML_log.xxx" );
     &rm( "RAxML_parsimonyTree.xxx" );
     &mv( "RAxML_info.xxx", $outfile."_raxml_info" );
-    if ( $bootstraps > 1 ) {
+   # if ( $bootstraps > 1 ) {
         &rm( "RAxML_bestTree.xxx" );
         &mv( "RAxML_bipartitions.xxx", $CONSENSUS_RAXML );
         &append( "RAxML_bootstrap.xxx", $OUTTREES_ALL );
@@ -779,10 +779,10 @@ if ( $use_raxml == 1 ) {
             &rm( "RAxML_bootstrap.xxx" );
         }
         $all_count++;
-    }
-    else {
-        &mv( "RAxML_result.xxx", $OUTTREE_RAXML );
-    }
+  #  }
+  #  else {
+  #      &mv( "RAxML_result.xxx", $OUTTREE_RAXML );
+  #  }
 }
 
 
@@ -1226,7 +1226,8 @@ else {
         &to_phyloxml( $OUTTREE_WEIGHBOR, $weighbor_outtree, 0, 1 );
     }
     if ( $use_raxml == 1 ) {
-        &to_phyloxml( $OUTTREE_RAXML, $raxml_outtree, 0, 1 );
+      #  &to_phyloxml( $OUTTREE_RAXML, $raxml_outtree, 0, 1 );
+           &to_phyloxml( $CONSENSUS_RAXML, $raxml_outtree, 1, 1 );
     }
     if ( $use_phyml == 1 ) {
         &to_phyloxml( $OUTTREE_PHYML, $phyml_outtree, 0, 1 );
@@ -1324,7 +1325,7 @@ chdir( $current_dir )
 rmdir( $temp_dir )
 || print "\n\n$0: Warning: Could not remove <<$temp_dir>>: $!\n\n";
 
-print "\n\n\n$0 successfully comleted.\n\n";
+print "\n\n\n$0 successfully completed.\n\n";
 
 exit( 0 ); 
     
@@ -1350,8 +1351,10 @@ sub executeRaxml {
     my $outfile_suffix = $_[ 4 ];
     my $algo           = $_[ 5 ];
     
+    $replicates = 100;
+    
     &testForTextFilePresence( $msa );
-    my $command = "$RAXML -m $model -s $msa -n $outfile_suffix";
+    my $command = "$RAXML -p 27 -m $model -s $msa -n $outfile_suffix";
       
     if ( $replicates > 1 ) {
         $command = $command . " -x $seed -N $replicates";
@@ -1444,7 +1447,7 @@ sub dieIfFileExists {
 sub dieIfFileNotExists {
     my $file = $_[ 0 ]; 
     unless ( ( -s $file ) && ( -f $file ) ) {
-        die( "\n\n$0: \"$file\" does not exist or is empty" );
+       die( "\n\n$0: \"$file\" does not exist or is empty" );
     }
 } 
 
index a090d52..51398ed 100644 (file)
@@ -184,9 +184,12 @@ module Evoruby
           original_sequences_file = cla.get_file_name( 1 )
         else
           hmmscan_index = domains_list_file.index(Constants::HMMSCAN)
+          puts domains_list_file
           if ( hmmscan_index != nil )
             prefix = domains_list_file[0 .. hmmscan_index-1 ]
+            puts prefix
             suffix = Constants::ID_NORMALIZED_FASTA_FILE_SUFFIX
+            puts suffix
             files = Dir.entries( "." )
             matching_files = Util.get_matching_files( files, prefix, suffix)
             if matching_files.length < 1
index b2f6446..3b51469 100644 (file)
@@ -22,7 +22,7 @@ module Evoruby
     DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -mp -or'
     DECORATOR_OPTIONS_DOMAINS   = '-p -t'
     SLEEP_TIME                  = 0.01
-    REMOVE_NI                   = true
+    REMOVE_NI                   = false
     TMP_FILE_1                  = '___PD1___'
     TMP_FILE_2                  = '___PD2___'
     LOG_FILE                    = '00_phylogenies_decorator.log'
index c9e15f9..4866f69 100644 (file)
@@ -14,7 +14,7 @@ module Evoruby
     ID_NORMALIZED_FASTA_FILE_SUFFIX          = "_ni.fasta"
     ID_MAP_FILE_SUFFIX                       = ".nim"
     DOMAIN_TABLE_SUFFIX                      = "_domain_table"
-    HMMSCAN                                  = "_hmmscan_"
+    HMMSCAN                                  = "_hmmscan"
     DOMAINS_TO_FORESTER_OUTFILE_SUFFIX       = ".dff"
     DOMAINS_TO_FORESTER_EVALUE_CUTOFF_SUFFIX = "_dtfE"