in progress
authorcmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 30 Sep 2013 04:40:10 +0000 (04:40 +0000)
committercmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 30 Sep 2013 04:40:10 +0000 (04:40 +0000)
forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java
forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java
forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java
forester/java/src/org/forester/phylogeny/data/Accession.java
forester/java/src/org/forester/phylogeny/data/Sequence.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java

index 9e72a30..bdf5889 100644 (file)
@@ -84,6 +84,7 @@ public final class PhyloXmlMapping {
     public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM       = "from";
     public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO         = "to";
     public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE = "confidence";
+    public final static String SEQUENCE_X_REFS                                     = "cross_references";
     // public final static String NODE_IDENTIFIER                                     = "node_id";
     public final static String IDENTIFIER                                          = "id";
     public final static String IDENTIFIER_PROVIDER_ATTR                            = "provider";
@@ -130,6 +131,7 @@ public final class PhyloXmlMapping {
     public final static String SEQUENCE_RELATION_DISTANCE                          = "distance";
     public final static String SEQUENCE_SOURCE_ID                                  = "id_source";
     public final static String POLYGON                                             = "polygon";
+  
 
     private PhyloXmlMapping() {
     }
index e585cb3..95d5e9b 100644 (file)
@@ -37,7 +37,7 @@ public final class PhyloXmlUtil {
 
     public static final String       OTHER                                      = "other";
     public static final String       UNKNOWN                                    = "unknown";
-    public final static Pattern      SEQUENCE_SYMBOL_PATTERN                    = Pattern.compile( "\\S{1,20}" );
+    public final static Pattern      SEQUENCE_SYMBOL_PATTERN                    = Pattern.compile( "\\S{1,30}" );
     public final static Pattern      TAXOMONY_CODE_PATTERN                      = Pattern
                                                                                         .compile( ParserUtils.TAX_CODE );
     public final static Pattern      LIT_REF_DOI_PATTERN                        = Pattern
index fa1caad..680cb4d 100644 (file)
@@ -89,6 +89,13 @@ public class SequenceParser implements PhylogenyDataPhyloXmlParser {
             else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.URI ) ) {
                 sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) );
             }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_X_REFS ) ) {
+                for( int j = 0; j < child_element.getNumberOfChildElements(); ++j ) {
+                //    final XmlElement c = child_element.getChildElement( j );
+                    sequence.addCrossReference( ( Accession ) AccessionParser.getInstance().parse( child_element.getChildElement( j ) ) );
+                }
+                //sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) );
+            }
         }
         return sequence;
     }
index 069978b..6e9f158 100644 (file)
@@ -32,20 +32,20 @@ import org.forester.io.parsers.nhx.NHXtags;
 import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
 import org.forester.util.ForesterUtil;
 
-public final class Accession implements PhylogenyData {
+public final class Accession implements PhylogenyData, Comparable<Accession> {
 
     final private String _value;
     final private String _source;
-    final private String _value_source;
+    final private String _source_value;
 
     public Accession( final String value, final String source ) {
         _value = value;
         _source = source;
         if ( source != null ) {
-            _value_source = value + source;
+            _source_value = source + value;
         }
         else {
-            _value_source = value;
+            _source_value = value;
         }
     }
 
@@ -98,10 +98,8 @@ public final class Accession implements PhylogenyData {
 
     @Override
     public int hashCode() {
-        //if ( getSource() != null ) {
-        //    return ( getSource() + getValue() ).hashCode();
-        // }
-        return _value_source.hashCode();
+      
+        return _source_value.hashCode();
     }
 
     @Override
@@ -152,4 +150,12 @@ public final class Accession implements PhylogenyData {
     public String toString() {
         return asText().toString();
     }
+
+    @Override
+    public int compareTo( Accession o ) {
+        if ( equals( o ) ) {
+            return 0;
+        }
+        return  _source_value.compareTo( o._source_value );
+    }
 }
index b9e6aa3..ac4be9e 100644 (file)
@@ -53,6 +53,7 @@ public class Sequence implements PhylogenyData, MultipleUris {
     private DomainArchitecture     _da;
     private List<Uri>              _uris;
     private List<SequenceRelation> _seq_relations;
+    private SortedSet<Accession>   _xrefs;
 
     public Sequence() {
         init();
@@ -63,12 +64,28 @@ public class Sequence implements PhylogenyData, MultipleUris {
                 && ForesterUtil.isEmpty( getType() ) && ForesterUtil.isEmpty( getLocation() )
                 && ForesterUtil.isEmpty( getSourceId() ) && ForesterUtil.isEmpty( getMolecularSequence() )
                 && ( getDomainArchitecture() == null ) && ForesterUtil.isEmpty( _annotations )
-                && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations );
+                && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations )
+                && ( getCrossReferences() == null || getCrossReferences().isEmpty() );
     }
 
     public void addAnnotation( final Annotation annotation ) {
         getAnnotations().add( annotation );
     }
+    
+    public void addCrossReference( Accession cross_reference ) {
+        if ( getCrossReferences() == null ) {
+            setCrossReferences( new TreeSet<Accession>() );
+        }
+        getCrossReferences().add( cross_reference  );
+    }
+    
+    public SortedSet<Accession> getCrossReferences() {
+        return _xrefs;
+    }
+    
+    private void setCrossReferences( TreeSet<Accession> cross_references ) {
+        _xrefs = cross_references;
+    }
 
     @Override
     public void addUri( final Uri uri ) {
@@ -149,6 +166,14 @@ public class Sequence implements PhylogenyData, MultipleUris {
         else {
             seq.setDomainArchitecture( null );
         }
+        if ( getCrossReferences() != null ) {
+            seq.setCrossReferences( new TreeSet<Accession>() );
+            for( final Accession x : getCrossReferences() ) {
+                if ( x != null ) {
+                    seq.getCrossReferences().add( x);
+                }
+            }
+        }
         return seq;
     }
 
@@ -279,6 +304,7 @@ public class Sequence implements PhylogenyData, MultipleUris {
         setUris( null );
         setSequenceRelations( null );
         setSourceId( null );
+        setCrossReferences(null);
     }
 
     @Override
@@ -391,14 +417,14 @@ public class Sequence implements PhylogenyData, MultipleUris {
                                              String.valueOf( isMolecularSequenceAligned() ),
                                              indentation );
         }
-        if ( getUris() != null ) {
+        if ( getUris() != null && !getUris().isEmpty() ) {
             for( final Uri uri : getUris() ) {
                 if ( uri != null ) {
                     uri.toPhyloXML( writer, level, indentation );
                 }
             }
         }
-        if ( _annotations != null ) {
+        if ( getAnnotations() != null && !getAnnotations().isEmpty() ) {
             for( final PhylogenyData annotation : getAnnotations() ) {
                 annotation.toPhyloXML( writer, level, my_ind );
             }
@@ -406,6 +432,17 @@ public class Sequence implements PhylogenyData, MultipleUris {
         if ( getDomainArchitecture() != null ) {
             getDomainArchitecture().toPhyloXML( writer, level, my_ind );
         }
+        if ( getCrossReferences() != null && !getCrossReferences().isEmpty() ) {
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( my_ind );
+            PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_X_REFS );
+            for( final PhylogenyData x : getCrossReferences() ) {
+                x.toPhyloXML( writer, level, my_ind );
+            }
+            writer.write( ForesterUtil.LINE_SEPARATOR );
+            writer.write( my_ind );
+            PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_X_REFS );
+        } 
         writer.write( ForesterUtil.LINE_SEPARATOR );
         writer.write( indentation );
         PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE );
index 609b7df..02efa90 100644 (file)
@@ -36,6 +36,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Set;
+import java.util.SortedSet;
 
 import org.forester.application.support_transfer;
 import org.forester.archaeopteryx.TreePanelUtil;
@@ -1208,6 +1209,22 @@ public final class Test {
             if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) {
                 return false;
             }
+            SortedSet<Accession> x =  t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences();
+            if ( x.size() != 4 ) {
+                return false;
+            }
+            int c = 0;
+            for( Accession acc : x ) {
+                if ( c == 0 ) {
+                    if ( !acc.getSource().equals( "KEGG" ) ) {
+                        return false;
+                    }
+                    if ( !acc.getValue().equals( "hsa:596" ) ) {
+                        return false;
+                    }
+                }
+                c++;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
@@ -1487,7 +1504,7 @@ public final class Test {
             }
             if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() )
                     .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) {
-                ;
+                
                 return false;
             }
             if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) {
@@ -1536,6 +1553,22 @@ public final class Test {
                     .equalsIgnoreCase( "433" ) ) {
                 return false;
             }
+            SortedSet<Accession> x =  t3_rt.getNode( "root node" ).getNodeData().getSequence().getCrossReferences();
+            if ( x.size() != 4 ) {
+                return false;
+            }
+            int c = 0;
+            for( Accession acc : x ) {
+                if ( c == 0 ) {
+                    if ( !acc.getSource().equals( "KEGG" ) ) {
+                        return false;
+                    }
+                    if ( !acc.getValue().equals( "hsa:596" ) ) {
+                        return false;
+                    }
+                }
+                c++;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
index f28a8f1..f370e3d 100644 (file)
@@ -241,8 +241,9 @@ public final class SequenceDbWsTools {
                     seq.setName( db_entry.getSequenceName() );
                 }
                 if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
+                    final String gn = db_entry.getGeneName().replace( ' ', '_' );
                     try {
-                        seq.setSymbol( db_entry.getGeneName() );
+                        seq.setSymbol( gn );
                     }
                     catch ( PhyloXmlDataFormatException e ) {
                         // Eat this exception.