From 1483115eb1fc01b30eb8b7465acd9819e90622d5 Mon Sep 17 00:00:00 2001 From: Ben Soares Date: Fri, 17 May 2024 15:55:52 +0100 Subject: [PATCH] JAL-4420 Allow preprocessing of arguments if they are all simply filenames (or URLs). Preprocessing involves adding --open arguments, and spotting and regrouping associated files such as annotations, features and tree files. These get --annotations argument etc. Add FileUtils tools to get basepath and extension of filename or URL whilst only in String form. --- src/jalview/bin/argparser/ArgParser.java | 8 ++ src/jalview/util/ArgParserUtils.java | 171 ++++++++++++++++++++++++++++++ src/jalview/util/FileUtils.java | 70 ++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 src/jalview/util/ArgParserUtils.java diff --git a/src/jalview/bin/argparser/ArgParser.java b/src/jalview/bin/argparser/ArgParser.java index 12e1b1d..bbcf831 100644 --- a/src/jalview/bin/argparser/ArgParser.java +++ b/src/jalview/bin/argparser/ArgParser.java @@ -33,11 +33,13 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import jalview.bin.Cache; import jalview.bin.Console; import jalview.bin.Jalview; import jalview.bin.Jalview.ExitCode; import jalview.bin.argparser.Arg.Opt; import jalview.bin.argparser.Arg.Type; +import jalview.util.ArgParserUtils; import jalview.util.FileUtils; import jalview.util.HttpUtils; @@ -317,6 +319,12 @@ public class ArgParser return; } + // preprocess for associated files only if no actual --args supplied + if (!dd && !Cache.getDefault("NOARGPREPROCESSING", false)) + { + ArgParserUtils.preProcessArgs(args); + } + if (bsa != null) { this.bootstrapArgs = bsa; diff --git a/src/jalview/util/ArgParserUtils.java b/src/jalview/util/ArgParserUtils.java new file mode 100644 index 0000000..be0f428 --- /dev/null +++ b/src/jalview/util/ArgParserUtils.java @@ -0,0 +1,171 @@ +package jalview.util; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +import jalview.bin.Cache; +import jalview.bin.argparser.Arg; +import jalview.io.FileFormatI; +import jalview.io.FileFormats; + +public class ArgParserUtils +{ + private static Set alignmentExtensions = null; + + private static Set annotationsExtensions = null; + + private static Set featuresExtensions = null; + + private static Set treeExtensions = null; + + public static void preProcessArgs(List filenames) + { + // Running through the arguments to look for '-arg' or '--arg' should + // already have happened, not doing it again. + if (alignmentExtensions == null) + { + setValidExtensions(); + } + + Set filesSet = new HashSet<>(filenames); + + List argSet = new ArrayList<>(); + argSet.add(Arg.ANNOTATIONS); + argSet.add(Arg.FEATURES); + argSet.add(Arg.TREE); + + Map> argExtensionsMap = new HashMap<>(); + argExtensionsMap.put(Arg.ANNOTATIONS, annotationsExtensions); + argExtensionsMap.put(Arg.FEATURES, featuresExtensions); + argExtensionsMap.put(Arg.TREE, treeExtensions); + + Map baseInfoMap = new HashMap<>(); + + // we make a copy to run through, and delete associated filenames from the + // original + final List filenamesCopy = new ArrayList<>(filenames); + for (String filename : filenamesCopy) + { + if (filename == null) + { + continue; + } + String ext = FileUtils.getExtension(filename); + if (ext != null && ext.length() > 0 + && alignmentExtensions.contains(ext)) + { + BaseInfo bi = new BaseInfo(filename); + + // this includes the dot at the end of the basename + String base = FileUtils.getBase(filename); + + for (Arg arg : argSet) + { + for (String possibleExt : argExtensionsMap.get(arg)) + { + String possibleFile = base + possibleExt; + if (filesSet.contains(possibleFile)) + { + bi.putAssociatedFile(arg, possibleFile); + filenames.remove(possibleFile); + break; + } + } + } + + baseInfoMap.put(filename, bi); + } + } + + // now we go through the saved associated files and add them back in to the + // right places with the appropriate argument + for (String filename : baseInfoMap.keySet()) + { + BaseInfo bi = baseInfoMap.get(filename); + + int pos = filenames.indexOf(filename); + if (bi.associatedFiles != null) + { + for (Arg a : bi.associatedFiles.keySet()) + { + String associatedFile = bi.associatedFiles.get(a); + if (associatedFile == null) + { + // shouldn't happen! + continue; + } + filenames.add(pos + 1, a.argString()); + filenames.add(pos + 2, associatedFile); + } + } + // add an --open arg to separate from other files + filenames.add(pos, Arg.OPEN.argString()); + } + } + + private static void setValidExtensions() + { + alignmentExtensions = new HashSet<>(); + FileFormats ffs = FileFormats.getInstance(); + List validFormats = ffs.getReadableFormats(); + + for (String fname : validFormats) + { + FileFormatI tff = ffs.forName(fname); + String[] extensions = tff.getExtensions().split(","); + for (String ext : extensions) + { + alignmentExtensions.add(ext.toLowerCase(Locale.ROOT)); + } + } + + annotationsExtensions = new HashSet<>(); + for (String ext : Cache + .getDefault("ARGPREPROCESSORANNOTATIONSEXTENSIONS", + "annotation,annotations") + .split(",")) + { + annotationsExtensions.add(ext); + } + + featuresExtensions = new HashSet<>(); + for (String ext : Cache.getDefault("ARGPREPROCESSORFEATURESEXTENSIONS", + "feature,features").split(",")) + { + featuresExtensions.add(ext); + } + + treeExtensions = new HashSet<>(); + for (String ext : Cache.getDefault("ARGPREPROCESSORTREEEXTENSIONS", + "tree,tre,newick,nwk").split(",")) + { + treeExtensions.add(ext); + } + } +} + +class BaseInfo +{ + String filename; + + Map associatedFiles = null; + + BaseInfo(String filename) + { + this.filename = filename; + } + + void putAssociatedFile(Arg a, String file) + { + if (associatedFiles == null) + { + associatedFiles = new HashMap<>(); + } + associatedFiles.put(a, file); + } +} diff --git a/src/jalview/util/FileUtils.java b/src/jalview/util/FileUtils.java index e7d274c..f2883a7 100644 --- a/src/jalview/util/FileUtils.java +++ b/src/jalview/util/FileUtils.java @@ -22,6 +22,8 @@ package jalview.util; import java.io.File; import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; import java.nio.file.FileSystems; import java.nio.file.FileVisitOption; import java.nio.file.FileVisitResult; @@ -291,4 +293,72 @@ public class FileUtils return parentDir.mkdirs(); } + + /** + * get a guessed file extension from a String only + * + * @param String + * filename + * @return String extension + */ + public static String getExtension(String filename) + { + return getBaseOrExtension(filename, true); + } + + /** + * getBase returns everything in a path/URI up to (and including) an extension + * dot. Note this is not the same as getBasename() since getBasename() only + * gives the filename base, not the path too. If no extension dot is found + * (i.e. a dot in character position 2 or more of the filename (after the last + * slash) then the whole path is considered the base. + * + * @param filename + * @return String base + */ + public static String getBase(String filename) + { + return getBaseOrExtension(filename, false); + } + + public static String getBaseOrExtension(String filename0, + boolean extension) + { + if (filename0 == null) + { + return null; + } + String filename = filename0; + boolean isUrl = false; + if (HttpUtils.startsWithHttpOrHttps(filename)) + { + try + { + URL url = new URL(filename); + filename = url.getPath(); + isUrl = true; + } catch (MalformedURLException e) + { + // continue to treat as a filename + } + } + int dot = filename.lastIndexOf('.'); + int slash = filename.lastIndexOf('/'); + if (!File.separator.equals("/") && !isUrl) + { + slash = filename.lastIndexOf(File.separator); + } + // only the dot of the filename (not dots in path) and not if it's a .hidden + // file + boolean hasExtension = dot > slash + 1; + if (extension) + { + return hasExtension ? filename.substring(dot + 1) : null; + } + else + { + dot = filename0.lastIndexOf('.'); + return hasExtension ? filename0.substring(0, dot + 1) : filename0; + } + } } -- 1.7.10.2