import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
import org.forester.phylogeny.data.BranchColor;
+import org.forester.phylogeny.data.NodeDataField;
import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.phylogeny.iterators.PreorderTreeIterator;
import org.forester.util.ForesterConstants;
import org.forester.util.ForesterUtil;
-import org.forester.util.SequenceIdParser;
+import org.forester.util.SequenceAccessionTools;
+import org.forester.util.StringInt;
import org.forester.ws.seqdb.UniProtTaxonomy;
public class TreePanelUtil {
final Configuration conf,
final TreePanel tp ) {
String uri_str = null;
- final String upkb = ForesterUtil.extractUniProtKbProteinSeqIdentifier( node );
+ final String upkb = SequenceAccessionTools.obtainUniProtAccessorFromDataFields( node );
if ( !ForesterUtil.isEmpty( upkb ) ) {
try {
- uri_str = ForesterUtil.UNIPROT_KB + URLEncoder.encode( upkb, ForesterConstants.UTF8 );
+ uri_str = ForesterUtil.UNIPROT_KB + URLEncoder.encode( upkb, ForesterConstants.UTF_8 );
}
catch ( final UnsupportedEncodingException e ) {
AptxUtil.showErrorMessage( tp, e.toString() );
}
}
if ( ForesterUtil.isEmpty( uri_str ) ) {
- final String v = ForesterUtil.extractGenbankAccessor( node );
+ final String v = SequenceAccessionTools.obtainGenbankAccessorFromDataFields( node );
if ( !ForesterUtil.isEmpty( v ) ) {
try {
- if ( SequenceIdParser.isProtein( v ) ) {
- uri_str = ForesterUtil.NCBI_PROTEIN + URLEncoder.encode( v, ForesterConstants.UTF8 );
+ if ( SequenceAccessionTools.isProteinDbQuery( v ) ) {
+ uri_str = ForesterUtil.NCBI_PROTEIN + URLEncoder.encode( v, ForesterConstants.UTF_8 );
}
else {
- uri_str = ForesterUtil.NCBI_NUCCORE + URLEncoder.encode( v, ForesterConstants.UTF8 );
+ uri_str = ForesterUtil.NCBI_NUCCORE + URLEncoder.encode( v, ForesterConstants.UTF_8 );
}
}
catch ( final UnsupportedEncodingException e ) {
}
}
if ( ForesterUtil.isEmpty( uri_str ) ) {
- final String v = ForesterUtil.extractRefSeqAccessorAccessor( node );
+ final String v = SequenceAccessionTools.obtainRefSeqAccessorFromDataFields( node );
if ( !ForesterUtil.isEmpty( v ) ) {
try {
- if ( SequenceIdParser.isProtein( v ) ) {
- uri_str = ForesterUtil.NCBI_PROTEIN + URLEncoder.encode( v, ForesterConstants.UTF8 );
+ if ( SequenceAccessionTools.isProteinDbQuery( v ) ) {
+ uri_str = ForesterUtil.NCBI_PROTEIN + URLEncoder.encode( v, ForesterConstants.UTF_8 );
}
else {
- uri_str = ForesterUtil.NCBI_NUCCORE + URLEncoder.encode( v, ForesterConstants.UTF8 );
+ uri_str = ForesterUtil.NCBI_NUCCORE + URLEncoder.encode( v, ForesterConstants.UTF_8 );
}
}
catch ( final UnsupportedEncodingException e ) {
}
}
if ( ForesterUtil.isEmpty( uri_str ) ) {
- final String v = ForesterUtil.extractGInumber( node );
+ final String v = SequenceAccessionTools.obtainGiNumberFromDataFields( node );
if ( !ForesterUtil.isEmpty( v ) ) {
try {
- uri_str = ForesterUtil.NCBI_GI + URLEncoder.encode( v, ForesterConstants.UTF8 );
+ uri_str = ForesterUtil.NCBI_GI + URLEncoder.encode( v, ForesterConstants.UTF_8 );
}
catch ( final UnsupportedEncodingException e ) {
AptxUtil.showErrorMessage( tp, e.toString() );
return uri_str;
}
+ public static List<String> createUrisForPdbWeb( final PhylogenyNode node,
+ final List<Accession> pdb_accs,
+ final Configuration configuration,
+ final TreePanel treePanel ) {
+ final List<String> uris = new ArrayList<String>();
+ if ( !ForesterUtil.isEmpty( pdb_accs ) ) {
+ for( final Accession pdb_acc : pdb_accs ) {
+ if ( !ForesterUtil.isEmpty( pdb_acc.getValue() ) ) {
+ uris.add( ForesterUtil.PDB + pdb_acc.getValue() );
+ }
+ }
+ }
+ return uris;
+ }
+
/**
* Returns the set of distinct taxonomies of
* all external nodes of node.
* If at least one the external nodes has no taxonomy,
* null is returned.
- *
+ *
*/
public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = node.getAllExternalDescendants();
if ( cp.isShowNodeNames() && !ForesterUtil.isEmpty( node.getName() ) ) {
TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getName(), sb );
}
- if ( cp.isShowGeneNames() && node.getNodeData().isHasSequence()
+ if ( cp.isShowSeqNames() && node.getNodeData().isHasSequence()
&& !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) {
TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getName(), sb );
}
- if ( cp.isShowGeneSymbols() && node.getNodeData().isHasSequence()
+ if ( cp.isShowSeqSymbols() && node.getNodeData().isHasSequence()
&& !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) {
- TreePanelUtil
- .showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getSymbol(), sb );
+ TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getSymbol(),
+ sb );
+ }
+ if ( cp.isShowGeneNames() && node.getNodeData().isHasSequence()
+ && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) {
+ TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence().getGeneName(),
+ sb );
}
if ( cp.isShowSequenceAcc() && node.getNodeData().isHasSequence()
&& ( node.getNodeData().getSequence().getAccession() != null )
}
if ( cp.isShowTaxonomyCode() && node.getNodeData().isHasTaxonomy()
&& !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
- TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy()
- .getTaxonomyCode(), sb );
+ TreePanelUtil
+ .showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy().getTaxonomyCode(),
+ sb );
}
if ( cp.isShowTaxonomyScientificNames() && node.getNodeData().isHasTaxonomy()
&& !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
- TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy()
- .getScientificName(), sb );
+ TreePanelUtil
+ .showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy().getScientificName(),
+ sb );
}
if ( cp.isShowTaxonomyCommonNames() && node.getNodeData().isHasTaxonomy()
&& !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getCommonName() ) ) {
- TreePanelUtil
- .showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy().getCommonName(), sb );
- }
- if ( ( cp.isShowGeneNames() || cp.isShowGeneSymbols() || cp.isShowSequenceAcc() )
- && node.getNodeData().isHasSequence()
- && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) {
- TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence()
- .getMolecularSequence(), sb );
- }
+ TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getTaxonomy().getCommonName(),
+ sb );
+ }
+ // if ( ( cp.isShowSeqNames() || cp.isShowSeqSymbols() || cp.isShowSequenceAcc() )
+ // && node.getNodeData().isHasSequence()
+ // && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) {
+ // TreePanelUtil.showExtDescNodeDataUserSelectedHelperHelper( node.getNodeData().getSequence()
+ // .getMolecularSequence(), sb );
+ // }
final String s = sb.toString().trim();
if ( !ForesterUtil.isEmpty( s ) ) {
data.add( s );
JOptionPane.showMessageDialog( parent, msg, title, JOptionPane.INFORMATION_MESSAGE );
}
- final static Color calculateColorFromString( final String str, final boolean is_taxonomy ) {
- final String my_str = str.toUpperCase();
- char first = my_str.charAt( 0 );
- char second = ' ';
- char third = ' ';
- if ( my_str.length() > 1 ) {
- if ( is_taxonomy ) {
- second = my_str.charAt( 1 );
- }
- else {
- second = my_str.charAt( my_str.length() - 1 );
- }
- if ( is_taxonomy ) {
- if ( my_str.length() > 2 ) {
- if ( my_str.indexOf( " " ) > 0 ) {
- third = my_str.charAt( my_str.indexOf( " " ) + 1 );
- }
- else {
- third = my_str.charAt( 2 );
- }
- }
- }
- else if ( my_str.length() > 2 ) {
- third = my_str.charAt( ( my_str.length() - 1 ) / 2 );
- }
- }
- first = TreePanelUtil.normalizeCharForRGB( first );
- second = TreePanelUtil.normalizeCharForRGB( second );
- third = TreePanelUtil.normalizeCharForRGB( third );
- if ( ( first > 235 ) && ( second > 235 ) && ( third > 235 ) ) {
- first = 0;
- }
- else if ( ( first < 60 ) && ( second < 60 ) && ( third < 60 ) ) {
- second = 255;
- }
- return new Color( first, second, third );
- }
-
final static void collapseSpeciesSpecificSubtrees( final Phylogeny phy ) {
boolean inferred = false;
+ for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+ iter.next().setCollapse( false );
+ }
for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
final PhylogenyNode n = it.next();
if ( !n.isExternal() && !n.isCollapse() && ( n.getNumberOfDescendants() > 1 ) ) {
}
}
+ final static void uncollapseSubtree( final PhylogenyNode node ) {
+ node.setCollapse( false );
+ if ( node.isExternal() ) {
+ return;
+ }
+ final PhylogenyNodeIterator it = new PreorderTreeIterator( node );
+ while ( it.hasNext() ) {
+ it.next().setCollapse( false );
+ }
+ }
+
static void colorizeSubtree( final PhylogenyNode node, final BranchColor c ) {
node.getBranchData().setBranchColor( c );
final List<PhylogenyNode> descs = PhylogenyMethods.getAllDescendants( node );
if ( !n.getBranchData().isHasBranchColor() ) {
final Taxonomy tax = PhylogenyMethods.getExternalDescendantsTaxonomy( n );
if ( tax != null ) {
- n.getBranchData().setBranchColor( new BranchColor( tree_panel.calculateTaxonomyBasedColor( tax ) ) );
+ n.getBranchData()
+ .setBranchColor( new BranchColor( tree_panel.calculateTaxonomyBasedColor( tax ) ) );
final List<PhylogenyNode> descs = PhylogenyMethods.getAllDescendants( n );
for( final PhylogenyNode desc : descs ) {
desc.getBranchData()
}
}
- final static int colorPhylogenyAccordingToRanks( final Phylogeny tree, final String rank, final TreePanel tree_panel ) {
+// final static int collapseByTaxonomicRank( final Phylogeny tree, final String rank, final TreePanel tree_panel ) {
+// final Set<String> true_lineage_set = new HashSet<String>();
+// for( final PhylogenyNodeIterator iter = tree.iteratorPreorder(); iter.hasNext(); ) {
+// iter.next().setCollapse( false );
+// }
+// int collapsed = 0;
+// for( final PhylogenyNodeIterator it = tree.iteratorPostorder(); it.hasNext(); ) {
+// final PhylogenyNode n = it.next();
+// if ( !n.isExternal() && n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() )
+// && n.getNodeData().getTaxonomy().getRank().equalsIgnoreCase( rank ) /*&& !n.isRoot()*/ ) {
+// TreePanelUtil.collapseSubtree( n, true );
+// ++collapsed;
+// if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
+// true_lineage_set.add( n.getNodeData().getTaxonomy().getScientificName() );
+// }
+// }
+// }
+// for( final PhylogenyNodeIterator it = tree.iteratorPostorder(); it.hasNext(); ) {
+// final PhylogenyNode node = it.next();
+// if ( ( !node.isExternal() && !node.isCollapse() ) && node.getNodeData().isHasTaxonomy()
+// && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getLineage() ) /* && !node.isRoot()*/ ) {
+// boolean success = false;
+// if ( !true_lineage_set.isEmpty() ) {
+// for( final String lin : node.getNodeData().getTaxonomy().getLineage() ) {
+// if ( true_lineage_set.contains( lin ) ) {
+// TreePanelUtil.collapseSubtree( node, true );
+// ++collapsed;
+// success = true;
+// break;
+// }
+// }
+// }
+// if ( !success ) {
+// final Map<String, String> lineage_to_rank_map = MainPanel.getLineageToRankMap();
+// for( final String lin : node.getNodeData().getTaxonomy().getLineage() ) {
+// final Taxonomy temp_tax = new Taxonomy();
+// temp_tax.setScientificName( lin );
+// if ( lineage_to_rank_map.containsKey( lin )
+// && !ForesterUtil.isEmpty( lineage_to_rank_map.get( lin ) )
+// && lineage_to_rank_map.get( lin ).equalsIgnoreCase( rank ) ) {
+// TreePanelUtil.collapseSubtree( node, true );
+// ++collapsed;
+// true_lineage_set.add( lin );
+// break;
+// }
+// else {
+// UniProtTaxonomy up = null;
+// try {
+// up = TaxonomyDataManager.obtainUniProtTaxonomy( temp_tax, null, null );
+// }
+// catch ( final Exception e ) {
+// e.printStackTrace();
+// }
+// if ( ( up != null ) && !ForesterUtil.isEmpty( up.getRank() ) ) {
+// lineage_to_rank_map.put( lin, up.getRank() );
+// System.out.println( lin + "->" + up.getRank() );
+// if ( up.getRank().equalsIgnoreCase( rank ) ) {
+// TreePanelUtil.collapseSubtree( node, true );
+// ++collapsed;
+// true_lineage_set.add( lin );
+// break;
+// }
+// }
+// }
+// }
+// }
+// }
+// }
+// return collapsed;
+// }
+
+ final static int colorPhylogenyAccordingToRanks( final Phylogeny tree,
+ final String rank,
+ final TreePanel tree_panel ) {
final Map<String, Color> true_lineage_to_color_map = new HashMap<String, Color>();
int colorizations = 0;
for( final PhylogenyNodeIterator it = tree.iteratorPostorder(); it.hasNext(); ) {
final PhylogenyNode n = it.next();
if ( n.getNodeData().isHasTaxonomy()
&& ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() )
- || !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) || !ForesterUtil
- .isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ || !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() )
+ || !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getRank() )
&& n.getNodeData().getTaxonomy().getRank().equalsIgnoreCase( rank ) ) {
- final BranchColor c = new BranchColor( tree_panel.calculateTaxonomyBasedColor( n.getNodeData()
- .getTaxonomy() ) );
+ final BranchColor c = new BranchColor( tree_panel
+ .calculateTaxonomyBasedColor( n.getNodeData().getTaxonomy() ) );
TreePanelUtil.colorizeSubtree( n, c );
++colorizations;
if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
- true_lineage_to_color_map.put( n.getNodeData().getTaxonomy().getScientificName(), c.getValue() );
+ true_lineage_to_color_map.put( n.getNodeData().getTaxonomy().getScientificName(),
+ c.getValue() );
}
}
}
if ( !true_lineage_to_color_map.isEmpty() ) {
for( final String lin : node.getNodeData().getTaxonomy().getLineage() ) {
if ( true_lineage_to_color_map.containsKey( lin ) ) {
- TreePanelUtil
- .colorizeSubtree( node, new BranchColor( true_lineage_to_color_map.get( lin ) ) );
+ TreePanelUtil.colorizeSubtree( node,
+ new BranchColor( true_lineage_to_color_map.get( lin ) ) );
++colorizations;
success = true;
break;
}
if ( ( up != null ) && !ForesterUtil.isEmpty( up.getRank() ) ) {
lineage_to_rank_map.put( lin, up.getRank() );
+ System.out.println( lin + "->" + up.getRank() );
if ( up.getRank().equalsIgnoreCase( rank ) ) {
- final BranchColor c = new BranchColor( tree_panel.calculateTaxonomyBasedColor( temp_tax ) );
+ final BranchColor c = new BranchColor( tree_panel
+ .calculateTaxonomyBasedColor( temp_tax ) );
TreePanelUtil.colorizeSubtree( node, c );
++colorizations;
true_lineage_to_color_map.put( lin, c.getValue() );
return colorizations;
}
- final static String createAnnotationString( final SortedSet<Annotation> annotations, final boolean show_ref_sources ) {
+ final static String createAnnotationString( final SortedSet<Annotation> annotations,
+ final boolean show_ref_sources ) {
final SortedMap<String, List<Annotation>> m = new TreeMap<String, List<Annotation>>();
for( final Annotation an : annotations ) {
final String ref_source = ForesterUtil.isEmpty( an.getRefSource() ) ? "?" : an.getRefSource();
if ( sb.charAt( sb.length() - 1 ) == ' ' ) {
sb.deleteCharAt( sb.length() - 1 );
}
- if ( i < ans.size() - 1 ) {
+ if ( i < ( ans.size() - 1 ) ) {
sb.append( ", " );
}
}
final static boolean isSequenceEmpty( final Sequence seq ) {
return ( seq.getAccession() == null ) && ForesterUtil.isEmpty( seq.getName() )
- && ForesterUtil.isEmpty( seq.getSymbol() );
+ && ForesterUtil.isEmpty( seq.getGeneName() ) && ForesterUtil.isEmpty( seq.getSymbol() );
}
final static boolean isTaxonomyEmpty( final Taxonomy tax ) {
return ( ( tax.getIdentifier() == null ) && ForesterUtil.isEmpty( tax.getTaxonomyCode() )
- && ForesterUtil.isEmpty( tax.getCommonName() ) && ForesterUtil.isEmpty( tax.getScientificName() ) && tax
- .getSynonyms().isEmpty() );
+ && ForesterUtil.isEmpty( tax.getCommonName() ) && ForesterUtil.isEmpty( tax.getScientificName() )
+ && tax.getSynonyms().isEmpty() );
+ }
+
+ static final int nodeDataIntoStringBuffer( final List<String> data, final Options optz, final StringBuilder sb ) {
+ final SortedMap<String, Integer> map = new TreeMap<String, Integer>();
+ int size = 0;
+ if ( ( optz.getExtDescNodeDataToReturn() != NodeDataField.SEQUENCE_MOL_SEQ_FASTA )
+ && ( optz.getExtDescNodeDataToReturn() != NodeDataField.GO_TERM_IDS ) ) {
+ for( final String d : data ) {
+ if ( !ForesterUtil.isEmpty( d ) ) {
+ if ( map.containsKey( d ) ) {
+ map.put( d, map.get( d ) + 1 );
+ }
+ else {
+ map.put( d, 1 );
+ }
+ }
+ }
+ if ( ( optz.getExtDescNodeDataToReturn() == NodeDataField.DOMAINS_ALL )
+ || ( optz.getExtDescNodeDataToReturn() == NodeDataField.DOMAINS_COLLAPSED_PER_PROTEIN )
+ || ( optz.getExtDescNodeDataToReturn() == NodeDataField.SEQ_ANNOTATIONS ) ) {
+ final ArrayList<StringInt> sis = new ArrayList<StringInt>();
+ for( final Entry<String, Integer> e : map.entrySet() ) {
+ sis.add( new StringInt( e.getKey(), e.getValue() ) );
+ }
+ Collections.sort( sis, new StringInt.DescendingIntComparator() );
+ for( final StringInt si : sis ) {
+ sb.append( si.getString() );
+ sb.append( "\t" );
+ sb.append( si.getInt() );
+ sb.append( ForesterUtil.LINE_SEPARATOR );
+ }
+ }
+ else {
+ for( final Entry<String, Integer> e : map.entrySet() ) {
+ final String v = e.getKey();
+ final Object c = e.getValue();
+ sb.append( v );
+ sb.append( "\t" );
+ sb.append( c );
+ sb.append( ForesterUtil.LINE_SEPARATOR );
+ }
+ }
+ size = map.size();
+ }
+ else {
+ for( final String d : data ) {
+ if ( !ForesterUtil.isEmpty( d ) ) {
+ sb.append( d );
+ sb.append( ForesterUtil.LINE_SEPARATOR );
+ }
+ }
+ size = data.size();
+ }
+ return size;
}
- final static char normalizeCharForRGB( char c ) {
- c -= 65;
- c *= 10.2;
- c = c > 255 ? 255 : c;
- c = c < 0 ? 0 : c;
- return c;
+ final static String pdbAccToString( final List<Accession> accs, final int i ) {
+ if ( ForesterUtil.isEmpty( accs.get( i ).getComment() ) ) {
+ return accs.get( i ).getValue();
+ }
+ return accs.get( i ).getValue() + " (" + accs.get( i ).getComment().toLowerCase() + ")";
}
final static Phylogeny subTree( final PhylogenyNode new_root, final Phylogeny source_phy ) {