2 package org.forester.application;
4 import java.io.BufferedReader;
6 import java.io.IOException;
7 import java.util.ArrayList;
8 import java.util.Iterator;
11 import java.util.Map.Entry;
13 import java.util.SortedMap;
14 import java.util.SortedSet;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
20 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
21 import org.forester.phylogeny.Phylogeny;
22 import org.forester.phylogeny.PhylogenyMethods;
23 import org.forester.phylogeny.PhylogenyNode;
24 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
25 import org.forester.phylogeny.factories.PhylogenyFactory;
26 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
27 import org.forester.util.CommandLineArguments;
28 import org.forester.util.ForesterUtil;
30 public class dom_dup {
33 // ARATH SOYBN VOLCA CYAME PARTE THAPS EMIHU NAEGR
34 final static private String HELP_OPTION_1 = "help";
35 final static private String HELP_OPTION_2 = "h";
36 final static private String PRG_NAME = "dom_dup";
37 final static private String PRG_DESC = "";
38 final static private String PRG_VERSION = "0.90";
39 final static private String PRG_DATE = "2013.03.12";
40 final static private String E_MAIL = "phylosoft@gmail.com";
41 final static private String WWW = "sites.google.com/site/cmzmasek/home/software/forester";
43 public static void main( final String args[] ) {
45 final CommandLineArguments cla = new CommandLineArguments( args );
46 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( cla.getNumberOfNames() != 3 ) ) {
50 final String pattern_str = cla.getName( 0 );
51 final File intree_file = cla.getFile( 2 );
52 final File species_groups_file = cla.getFile( 1 );
53 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
54 final Phylogeny phy = factory.create( intree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
55 ForesterUtil.programMessage( PRG_NAME, "Pattern string: " + pattern_str );
56 final Pattern pattern = Pattern.compile( pattern_str );
57 ForesterUtil.programMessage( PRG_NAME, "Pattern is: " + pattern );
58 final SortedSet<String> set_a = new TreeSet<String>();
59 final SortedSet<String> set_b = new TreeSet<String>();
60 read( species_groups_file, set_a, set_b );
61 print_set( set_a, "Set a:" );
62 print_set( set_b, "Set b:" );
63 final SortedSet<String> matching_names = obtainMatchingNames( phy, pattern );
64 ForesterUtil.programMessage( PRG_NAME, "Found names: " );
65 final SortedMap<String, List<String>> pairs = obtainPairs( matching_names );
67 int non_lca_counter = 0;
68 int missing_counter = 0;
69 int total_counter = 0;
70 final Iterator<Entry<String, List<String>>> it = pairs.entrySet().iterator();
71 while ( it.hasNext() ) {
72 final Map.Entry<String, List<String>> x = it.next();
74 if ( x.getValue().size() == 2 ) {
75 final String a = x.getValue().get( 0 );
76 final String b = x.getValue().get( 1 );
77 System.out.print( a + " - " + b );
78 final PhylogenyNode lca = PhylogenyMethods.calculateLCA( phy.getNode( a ), phy.getNode( b ) );
79 final List<PhylogenyNode> external_descs = lca.getAllExternalDescendants();
82 for( final PhylogenyNode external_desc : external_descs ) {
83 final String tc = external_desc.getNodeData().getTaxonomy().getTaxonomyCode();
84 if ( set_a.contains( tc ) ) {
87 if ( set_b.contains( tc ) ) {
92 System.out.print( " => LCA " );
100 else if ( x.getValue().size() == 1 ) {
101 System.out.println( x.getValue().get( 0 ) + " => no partner in current tree!" );
105 System.out.println( "error" );
109 System.out.println( "Total : " + total_counter );
110 System.out.println( "LCA : " + lca_counter );
111 System.out.println( "Non-LCA : " + non_lca_counter );
112 System.out.println( "With missing: " + missing_counter );
114 catch ( final Exception e ) {
116 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
120 private static SortedMap<String, List<String>> obtainPairs( final SortedSet<String> matching_names ) {
121 final SortedMap<String, List<String>> pairs = new TreeMap<String, List<String>>();
122 for( final String m : matching_names ) {
123 final String short_m = m.substring( 0, m.indexOf( '~' ) );
124 if ( !pairs.containsKey( short_m ) ) {
125 final List<String> p = new ArrayList<String>();
127 pairs.put( short_m, p );
130 pairs.get( short_m ).add( m );
136 private static SortedSet<String> obtainMatchingNames( final Phylogeny phy, final Pattern pattern ) {
137 final SortedSet<String> matching_names = new TreeSet<String>();
138 for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
139 final PhylogenyNode n = it.next();
140 final Matcher m = pattern.matcher( n.getName() );
142 matching_names.add( n.getName() );
145 return matching_names;
148 private static void print_set( final Set<String> set_a, final String l ) {
149 ForesterUtil.programMessage( PRG_NAME, l );
150 for( final String s : set_a ) {
151 System.out.print( s + " " );
153 System.out.println();
156 private static void read( final File species_groups_file, final Set<String> set_a, final Set<String> set_b )
158 final BufferedReader reader = ForesterUtil.obtainReader( species_groups_file );
160 boolean first_line = true;
161 while ( ( line = reader.readLine() ) != null ) {
163 if ( !ForesterUtil.isEmpty( line ) ) {
164 final String s[] = line.split( " " );
165 for( final String name : s ) {
180 private static void printHelp() {
181 ForesterUtil.printProgramInformation( PRG_NAME,
187 ForesterUtil.getForesterLibraryInformation() );
188 System.out.println( "Usage:" );
189 System.out.println();
190 System.out.println( PRG_NAME + "" );
191 System.out.println();
192 System.out.println( " example: " );
193 System.out.println();
195 .println( "dom_dup \"HUMAN~[12]-2\" groups.txt RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_gsdi.phylo.xml" );
196 System.out.println();
197 System.out.println();