inprogress
[jalview.git] / forester / java / src / org / forester / phylogeny / PhylogenyMethods.java
index e8498c5..08fe7f0 100644 (file)
@@ -44,6 +44,8 @@ import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
 import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
 import org.forester.phylogeny.data.BranchColor;
 import org.forester.phylogeny.data.BranchWidth;
 import org.forester.phylogeny.data.Confidence;
@@ -266,6 +268,18 @@ public class PhylogenyMethods {
         return stats;
     }
 
+    public final static void collapseSubtreeStructure( final PhylogenyNode n ) {
+        final List<PhylogenyNode> eds = n.getAllExternalDescendants();
+        final List<Double> d = new ArrayList<Double>();
+        for( final PhylogenyNode ed : eds ) {
+            d.add( calculateDistanceToAncestor( n, ed ) );
+        }
+        for( int i = 0; i < eds.size(); ++i ) {
+            n.setChildNode( i, eds.get( i ) );
+            eds.get( i ).setDistanceToParent( d.get( i ) );
+        }
+    }
+
     public static int countNumberOfOneDescendantNodes( final Phylogeny phy ) {
         int count = 0;
         for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
@@ -297,9 +311,8 @@ public class PhylogenyMethods {
         return nodes;
     }
 
-    public static void deleteExternalNodesNegativeSelection( final Set<Integer> to_delete, final Phylogeny phy ) {
-        phy.clearHashIdToNodeMap();
-        for( final Integer id : to_delete ) {
+    public static void deleteExternalNodesNegativeSelection( final Set<Long> to_delete, final Phylogeny phy ) {
+        for( final Long id : to_delete ) {
             phy.deleteSubtree( phy.getNode( id ), true );
         }
         phy.clearHashIdToNodeMap();
@@ -327,24 +340,6 @@ public class PhylogenyMethods {
         p.externalNodesHaveChanged();
     }
 
-    public static void deleteExternalNodesPositiveSelection( final Set<Taxonomy> species_to_keep, final Phylogeny phy ) {
-        //   final Set<Integer> to_delete = new HashSet<Integer>();
-        for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
-            final PhylogenyNode n = it.next();
-            if ( n.getNodeData().isHasTaxonomy() ) {
-                if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
-                    //to_delete.add( n.getNodeId() );
-                    phy.deleteSubtree( n, true );
-                }
-            }
-            else {
-                throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
-            }
-        }
-        phy.clearHashIdToNodeMap();
-        phy.externalNodesHaveChanged();
-    }
-
     public static List<String> deleteExternalNodesPositiveSelection( final String[] node_names_to_keep,
                                                                      final Phylogeny p ) {
         final PhylogenyNodeIterator it = p.iteratorExternalForward();
@@ -367,6 +362,22 @@ public class PhylogenyMethods {
         return deleted;
     }
 
+    public static void deleteExternalNodesPositiveSelectionT( final List<Taxonomy> species_to_keep, final Phylogeny phy ) {
+        final Set<Long> to_delete = new HashSet<Long>();
+        for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+            final PhylogenyNode n = it.next();
+            if ( n.getNodeData().isHasTaxonomy() ) {
+                if ( !species_to_keep.contains( n.getNodeData().getTaxonomy() ) ) {
+                    to_delete.add( n.getId() );
+                }
+            }
+            else {
+                throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
+            }
+        }
+        deleteExternalNodesNegativeSelection( to_delete, phy );
+    }
+
     final public static void deleteInternalNodesWithOnlyOneDescendent( final Phylogeny phy ) {
         final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
         for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
@@ -909,6 +920,10 @@ public class PhylogenyMethods {
                 match = true;
             }
             if ( !match && node.getNodeData().isHasSequence()
+                    && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+                match = true;
+            }
+            if ( !match && node.getNodeData().isHasSequence()
                     && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
                 match = true;
             }
@@ -931,6 +946,38 @@ public class PhylogenyMethods {
                     }
                 }
             }
+            //
+            if ( !match && node.getNodeData().isHasSequence()
+                    && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
+                for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
+                    if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                    if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                }
+            }
+            if ( !match && node.getNodeData().isHasSequence()
+                    && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
+                for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
+                    if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                    if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                    if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                }
+            }
+            //
             if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
                 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
                 I: while ( it.hasNext() ) {
@@ -1009,6 +1056,10 @@ public class PhylogenyMethods {
                     match = true;
                 }
                 if ( !match && node.getNodeData().isHasSequence()
+                        && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+                    match = true;
+                }
+                if ( !match && node.getNodeData().isHasSequence()
                         && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
                     match = true;
                 }
@@ -1031,6 +1082,38 @@ public class PhylogenyMethods {
                         }
                     }
                 }
+                //
+                if ( !match && node.getNodeData().isHasSequence()
+                        && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
+                    for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
+                        if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                        if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                    }
+                }
+                if ( !match && node.getNodeData().isHasSequence()
+                        && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
+                    for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
+                        if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                        if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                        if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                    }
+                }
+                //
                 if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
                     Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
                     I: while ( it.hasNext() ) {
@@ -1162,6 +1245,11 @@ public class PhylogenyMethods {
                         return n1.getNodeData().getSequence().getSymbol()
                                 .compareTo( n2.getNodeData().getSequence().getSymbol() );
                     }
+                    if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+                            && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+                        return n1.getNodeData().getSequence().getGeneName()
+                                .compareTo( n2.getNodeData().getSequence().getGeneName() );
+                    }
                     if ( ( n1.getNodeData().getSequence().getAccession() != null )
                             && ( n2.getNodeData().getSequence().getAccession() != null )
                             && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
@@ -1191,6 +1279,11 @@ public class PhylogenyMethods {
                         return n1.getNodeData().getSequence().getSymbol()
                                 .compareTo( n2.getNodeData().getSequence().getSymbol() );
                     }
+                    if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+                            && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+                        return n1.getNodeData().getSequence().getGeneName()
+                                .compareTo( n2.getNodeData().getSequence().getGeneName() );
+                    }
                     if ( ( n1.getNodeData().getSequence().getAccession() != null )
                             && ( n2.getNodeData().getSequence().getAccession() != null )
                             && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
@@ -1257,6 +1350,11 @@ public class PhylogenyMethods {
                         return n1.getNodeData().getSequence().getSymbol()
                                 .compareTo( n2.getNodeData().getSequence().getSymbol() );
                     }
+                    if ( ( !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getGeneName() ) )
+                            && ( !ForesterUtil.isEmpty( n2.getNodeData().getSequence().getGeneName() ) ) ) {
+                        return n1.getNodeData().getSequence().getGeneName()
+                                .compareTo( n2.getNodeData().getSequence().getGeneName() );
+                    }
                     if ( ( n1.getNodeData().getSequence().getAccession() != null )
                             && ( n2.getNodeData().getSequence().getAccession() != null )
                             && !ForesterUtil.isEmpty( n1.getNodeData().getSequence().getAccession().getValue() )
@@ -1481,6 +1579,24 @@ public class PhylogenyMethods {
         return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
     }
 
+    static double calculateDistanceToAncestor( final PhylogenyNode anc, PhylogenyNode desc ) {
+        double d = 0;
+        boolean all_default = true;
+        while ( anc != desc ) {
+            if ( desc.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
+                d += desc.getDistanceToParent();
+                if ( all_default ) {
+                    all_default = false;
+                }
+            }
+            desc = desc.getParent();
+        }
+        if ( all_default ) {
+            return PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
+        }
+        return d;
+    }
+
     /**
      * Deep copies the phylogeny originating from this node.
      */