X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FSurfacingUtil.java;h=8daa322e15268ed688b03a32b3ce13d71c2ceaa9;hb=72c535142a5e6b0da9c7edb2f605eb835b43e6fb;hp=974409d614eb201246c6c7a4f29560efaf492831;hpb=07d2b6864846c6c95ec1d4f1b083c248ffbcc591;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 974409d..8daa322 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -58,6 +58,7 @@ import org.forester.evoinference.matrix.character.CharacterStateMatrix; import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format; import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; +import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.go.GoId; import org.forester.go.GoNameSpace; @@ -238,8 +239,10 @@ public final class SurfacingUtil { final DescriptiveStatistics gained_once_domain_count_stats = new BasicDescriptiveStatistics(); final DescriptiveStatistics gained_multiple_times_lengths_stats = new BasicDescriptiveStatistics(); final DescriptiveStatistics gained_multiple_times_domain_count_stats = new BasicDescriptiveStatistics(); - final DescriptiveStatistics gained_multiple_times_domain_length_stats = new BasicDescriptiveStatistics(); - final DescriptiveStatistics gained_once_domain_length_stats = new BasicDescriptiveStatistics(); + long gained_multiple_times_domain_length_sum = 0; + long gained_once_domain_length_sum = 0; + long gained_multiple_times_domain_length_count = 0; + long gained_once_domain_length_count = 0; for( final String dc : dcs ) { final int count = dc_gain_counts.get( dc ); if ( histogram.containsKey( count ) ) { @@ -286,15 +289,13 @@ public final class SurfacingUtil { more_than_once.add( dc ); if ( protein_length_stats_by_dc != null ) { final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc ); - final double[] a = s.getDataAsDoubleArray(); - for( final double element : a ) { + for( final double element : s.getData() ) { gained_multiple_times_lengths_stats.addValue( element ); } } if ( domain_number_stats_by_dc != null ) { final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc ); - final double[] a = s.getDataAsDoubleArray(); - for( final double element : a ) { + for( final double element : s.getData() ) { gained_multiple_times_domain_count_stats.addValue( element ); } } @@ -302,28 +303,26 @@ public final class SurfacingUtil { final String[] ds = dc.split( "=" ); final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] ); final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] ); - final double[] a0 = s0.getDataAsDoubleArray(); - final double[] a1 = s1.getDataAsDoubleArray(); - for( final double element : a0 ) { - gained_multiple_times_domain_length_stats.addValue( element ); + for( final double element : s0.getData() ) { + gained_multiple_times_domain_length_sum += element; + ++gained_multiple_times_domain_length_count; } - for( final double element : a1 ) { - gained_multiple_times_domain_length_stats.addValue( element ); + for( final double element : s1.getData() ) { + gained_multiple_times_domain_length_sum += element; + ++gained_multiple_times_domain_length_count; } } } else { if ( protein_length_stats_by_dc != null ) { final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc ); - final double[] a = s.getDataAsDoubleArray(); - for( final double element : a ) { + for( final double element : s.getData() ) { gained_once_lengths_stats.addValue( element ); } } if ( domain_number_stats_by_dc != null ) { final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc ); - final double[] a = s.getDataAsDoubleArray(); - for( final double element : a ) { + for( final double element : s.getData() ) { gained_once_domain_count_stats.addValue( element ); } } @@ -331,13 +330,13 @@ public final class SurfacingUtil { final String[] ds = dc.split( "=" ); final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] ); final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] ); - final double[] a0 = s0.getDataAsDoubleArray(); - final double[] a1 = s1.getDataAsDoubleArray(); - for( final double element : a0 ) { - gained_once_domain_length_stats.addValue( element ); + for( final double element : s0.getData() ) { + gained_once_domain_length_sum += element; + ++gained_once_domain_length_count; } - for( final double element : a1 ) { - gained_once_domain_length_stats.addValue( element ); + for( final double element : s1.getData() ) { + gained_once_domain_length_sum += element; + ++gained_once_domain_length_count; } } } @@ -375,8 +374,7 @@ public final class SurfacingUtil { } for( int i = 0; i < nodes.size() - 1; ++i ) { for( int j = i + 1; j < nodes.size(); ++j ) { - final PhylogenyNode lca = PhylogenyMethods.getInstance().obtainLCA( nodes.get( i ), - nodes.get( j ) ); + final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ), nodes.get( j ) ); String rank = "unknown"; if ( lca.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) { @@ -409,7 +407,7 @@ public final class SurfacingUtil { out_for_rank_counts.close(); out_for_ancestor_species_counts.close(); if ( !ForesterUtil.isEmpty( outfilename_for_protein_stats ) - && ( ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) { + && ( ( domain_length_stats_by_domain != null ) || ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) { final BufferedWriter w = new BufferedWriter( new FileWriter( outfilename_for_protein_stats ) ); w.write( "Domain Lengths: " ); w.write( "\n" ); @@ -455,12 +453,17 @@ public final class SurfacingUtil { w.write( "\n" ); w.write( "Gained once, domain lengths:" ); w.write( "\n" ); - w.write( gained_once_domain_length_stats.toString() ); + w.write( "N: " + gained_once_domain_length_count ); + w.write( "\n" ); + w.write( "Avg: " + ( ( double ) gained_once_domain_length_sum / gained_once_domain_length_count ) ); w.write( "\n" ); w.write( "\n" ); w.write( "Gained multiple times, domain lengths:" ); w.write( "\n" ); - w.write( gained_multiple_times_domain_length_stats.toString() ); + w.write( "N: " + gained_multiple_times_domain_length_count ); + w.write( "\n" ); + w.write( "Avg: " + + ( ( double ) gained_multiple_times_domain_length_sum / gained_multiple_times_domain_length_count ) ); w.write( "\n" ); w.write( "\n" ); w.write( "\n" ); @@ -639,7 +642,7 @@ public final class SurfacingUtil { public static Phylogeny createNjTreeBasedOnMatrixToFile( final File nj_tree_outfile, final DistanceMatrix distance ) { checkForOutputFileWriteability( nj_tree_outfile ); final NeighborJoining nj = NeighborJoining.createInstance(); - final Phylogeny phylogeny = nj.execute( distance ); + final Phylogeny phylogeny = nj.execute( ( BasicSymmetricalDistanceMatrix ) distance ); phylogeny.setName( nj_tree_outfile.getName() ); writePhylogenyToFile( phylogeny, nj_tree_outfile.toString() ); return phylogeny; @@ -1181,23 +1184,61 @@ public final class SurfacingUtil { final DomainId domain_id, final Writer out, final String separator, - final String limit_to_species ) throws IOException { + final String limit_to_species, + final double domain_e_cutoff ) throws IOException { + System.out.println( "Per domain E-value: " + domain_e_cutoff ); for( final Species species : protein_lists_per_species.keySet() ) { + System.out.println( species + ":" ); for( final Protein protein : protein_lists_per_species.get( species ) ) { if ( ForesterUtil.isEmpty( limit_to_species ) || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { final List domains = protein.getProteinDomains( domain_id ); if ( domains.size() > 0 ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final Domain domain : domains ) { - stats.addValue( domain.getPerSequenceEvalue() ); - } out.write( protein.getSpecies().getSpeciesId() ); out.write( separator ); out.write( protein.getProteinId().getId() ); out.write( separator ); - out.write( "[" + FORMATTER.format( stats.median() ) + "]" ); + out.write( domain_id.toString() ); + out.write( separator ); + int prev_to = -1; + for( final Domain domain : domains ) { + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { + out.write( "/" ); + out.write( domain.getFrom() + "-" + domain.getTo() ); + if ( prev_to >= 0 ) { + final int l = domain.getFrom() - prev_to; + System.out.println( l ); + } + prev_to = domain.getTo(); + } + } + out.write( "/" ); out.write( separator ); + final List domain_list = new ArrayList(); + for( final Domain domain : protein.getProteinDomains() ) { + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { + domain_list.add( domain ); + } + } + final Domain domain_ary[] = new Domain[ domain_list.size() ]; + for( int i = 0; i < domain_list.size(); ++i ) { + domain_ary[ i ] = domain_list.get( i ); + } + Arrays.sort( domain_ary, new DomainComparator( true ) ); + out.write( "{" ); + boolean first = true; + for( final Domain domain : domain_ary ) { + if ( first ) { + first = false; + } + else { + out.write( "," ); + } + out.write( domain.getDomainId().toString() ); + out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); + out.write( ":" + domain.getPerDomainEvalue() ); + } + out.write( "}" ); if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription() .equals( SurfacingConstants.NONE ) ) ) { out.write( protein.getDescription() ); @@ -2588,4 +2629,24 @@ public final class SurfacingUtil { e.printStackTrace(); } } + + final static class DomainComparator implements Comparator { + + final private boolean _ascending; + + public DomainComparator( final boolean ascending ) { + _ascending = ascending; + } + + @Override + public final int compare( final Domain d0, final Domain d1 ) { + if ( d0.getFrom() < d1.getFrom() ) { + return _ascending ? -1 : 1; + } + else if ( d0.getFrom() > d1.getFrom() ) { + return _ascending ? 1 : -1; + } + return 0; + } + } }