X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2Futil%2FParserUtils.java;h=77af4c83ce6778143e27ae513256fd8d5db917be;hb=e9bf8327e1c9a7795ac7269e04d98c0570648fe4;hp=a465d4a9dddaca28e16d7db3da57bf12ba527a45;hpb=473f1d74784af1ff6e87f45ab7219a642b948785;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index a465d4a..77af4c8 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -61,12 +61,18 @@ public final class ParserUtils { final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(" + TAX_CODE + ")\\b" ); final public static Pattern TAXOMONY_SN_PATTERN = Pattern - .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)\\b" ); + .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]+_[a-z]{2,30}(?:_[a-z][a-z0-9_]+)?)\\b" ); + final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern + .compile( "\\b([A-Z][a-z]+[_ ][a-z]{2,30}(?:[_ ][a-z]{2,30})?)(?:\\b|_)" ); + final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern + .compile( "\\b([A-Z][a-z]+[_ ][a-z]{2,30}[_ ](?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)" ); + final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_2 = Pattern + .compile( "\\b([A-Z][a-z]+[_ ][a-z]{2,30}[_ ]\\((?:str|subsp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))(?:\\b|_)?" ); + final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" ); final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_(" + TAX_CODE + ")/\\d+-\\d+\\b" ); - final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A = Pattern.compile( "(?:\\b|_)(\\d{1,7})\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern - .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(\\d{1,7})\\b" ); + .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" ); @@ -199,6 +205,18 @@ public final class ParserUtils { if ( m.find() ) { return m.group( 1 ).replace( '_', ' ' ); } + final Matcher m_str1 = TAXOMONY_SN_PATTERN_STRAIN_1.matcher( name ); + if ( m_str1.find() ) { + return m_str1.group( 1 ).replace( '_', ' ' ); + } + final Matcher m_str2 = TAXOMONY_SN_PATTERN_STRAIN_2.matcher( name ); + if ( m_str2.find() ) { + return m_str2.group( 1 ).replace( '_', ' ' ); + } + final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name ); + if ( m_sn.find() ) { + return m_sn.group( 1 ).replace( '_', ' ' ); + } return null; } @@ -273,12 +291,12 @@ public final class ParserUtils { if ( m.find() ) { return m.group( 1 ); } - else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { - m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name ); - if ( m.find() ) { - return m.group( 1 ); - } - } + //else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { + // m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name ); + // if ( m.find() ) { + // return m.group( 1 ); + // } + //} } return null; }