- * Builds a query string for a given sequences using its DBRef entries
- *
- * @param seq
- * the sequences to build a query for
- * @return the built query string
- */
-
- static String buildQuery(SequenceI seq)
- {
- boolean isPDBRefsFound = false;
- boolean isUniProtRefsFound = false;
- StringBuilder queryBuilder = new StringBuilder();
- Set<String> seqRefs = new LinkedHashSet<>();
-
- if (seq.getAllPDBEntries() != null
- && queryBuilder.length() < MAX_QLENGTH)
- {
- for (PDBEntry entry : seq.getAllPDBEntries())
- {
- if (isValidSeqName(entry.getId()))
- {
- queryBuilder.append("pdb_id:").append(entry.getId().toLowerCase())
- .append(" OR ");
- isPDBRefsFound = true;
- }
- }
- }
-
- if (seq.getDBRefs() != null && seq.getDBRefs().length != 0)
- {
- for (DBRefEntry dbRef : seq.getDBRefs())
- {
- if (isValidSeqName(getDBRefId(dbRef))
- && queryBuilder.length() < MAX_QLENGTH)
- {
- if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
- {
- queryBuilder.append("uniprot_accession:")
- .append(getDBRefId(dbRef)).append(" OR ");
- queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
- .append(" OR ");
- isUniProtRefsFound = true;
- }
- else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
- {
-
- queryBuilder.append("pdb_id:")
- .append(getDBRefId(dbRef).toLowerCase()).append(" OR ");
- isPDBRefsFound = true;
- }
- else
- {
- seqRefs.add(getDBRefId(dbRef));
- }
- }
- }
- }
-
- if (!isPDBRefsFound && !isUniProtRefsFound)
- {
- String seqName = seq.getName();
- seqName = sanitizeSeqName(seqName);
- String[] names = seqName.toLowerCase().split("\\|");
- for (String name : names)
- {
- // System.out.println("Found name : " + name);
- name.trim();
- if (isValidSeqName(name))
- {
- seqRefs.add(name);
- }
- }
-
- for (String seqRef : seqRefs)
- {
- queryBuilder.append("text:").append(seqRef).append(" OR ");
- }
- }
-
- int endIndex = queryBuilder.lastIndexOf(" OR ");
- if (queryBuilder.toString().length() < 6)
- {
- return null;
- }
- String query = queryBuilder.toString().substring(0, endIndex);
- return query;
- }
-
- /**
- * Remove the following special characters from input string +, -, &, !, (, ),
- * {, }, [, ], ^, ", ~, *, ?, :, \
- *
- * @param seqName
- * @return
- */
- static String sanitizeSeqName(String seqName)
- {
- Objects.requireNonNull(seqName);
- return seqName.replaceAll("\\[\\d*\\]", "")
- .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
- }
-
- /**
- * Ensures sequence ref names are not less than 3 characters and does not
- * contain a database name
- *
- * @param seqName
- * @return
- */
- static boolean isValidSeqName(String seqName)
- {
- // System.out.println("seqName : " + seqName);
- String ignoreList = "pdb,uniprot,swiss-prot";
- if (seqName.length() < 3)
- {
- return false;
- }
- if (seqName.contains(":"))
- {
- return false;
- }
- seqName = seqName.toLowerCase();
- for (String ignoredEntry : ignoreList.split(","))
- {
- if (seqName.contains(ignoredEntry))
- {
- return false;
- }
- }
- return true;
- }
-
- static String getDBRefId(DBRefEntry dbRef)
- {
- String ref = dbRef.getAccessionId().replaceAll("GO:", "");
- return ref;
- }
-
- /**