2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 import java.io.BufferedReader;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.io.InputStream;
27 import java.util.HashMap;
31 * A class to check help file cross-references, and external URLs if internet
37 public class HelpLinksChecker implements BufferedLineReader.LineCleaner
39 private static final String HELP_HS = "help.hs";
41 private static final String HELP_TOC_XML = "helpTOC.xml";
43 private static final String HELP_JHM = "help.jhm";
45 private static boolean internetAvailable = true;
47 private int targetCount = 0;
49 private int mapCount = 0;
51 private int internalHrefCount = 0;
53 private int anchorRefCount = 0;
55 private int invalidAnchorRefCount = 0;
57 private int externalHrefCount = 0;
59 private int invalidMapUrlCount = 0;
61 private int invalidTargetCount = 0;
63 private int invalidImageCount = 0;
65 private int invalidInternalHrefCount = 0;
67 private int invalidExternalHrefCount = 0;
70 * The only parameter should be a path to the root of the help directory in
74 * [0] path to the /html folder in the workspace
76 * [1] (optional) -nointernet to suppress external link checking for
77 * a fast check of internal links only
80 public static void main(String[] args) throws IOException
82 if (args.length == 0 || args.length > 2
83 || (args.length == 2 && !args[1].equals("-nointernet")))
85 log("Usage: <pathToHelpFolder> [-nointernet]");
91 internetAvailable = false;
94 new HelpLinksChecker().checkLinks(args[0]);
98 * Checks help links and reports results
100 * @param helpDirectoryPath
101 * @throws IOException
103 void checkLinks(String helpDirectoryPath) throws IOException
105 log("Checking help file links");
106 File helpFolder = new File(helpDirectoryPath).getCanonicalFile();
107 if (!helpFolder.exists())
109 log("Can't find " + helpDirectoryPath);
113 internetAvailable &= connectToUrl("http://www.example.org");
115 Map<String, String> tocTargets = checkHelpMappings(helpFolder);
117 Map<String, String> unusedTargets = new HashMap<String, String>(
120 checkTableOfContents(helpFolder, tocTargets, unusedTargets);
122 checkHelpSet(helpFolder, tocTargets, unusedTargets);
124 checkHtmlFolder(new File(helpFolder, "html"));
126 reportResults(unusedTargets);
130 * Checks all html files in the given directory or its sub-directories
133 * @throws IOException
135 private void checkHtmlFolder(File folder) throws IOException
137 File[] files = folder.listFiles();
146 if (f.getAbsolutePath().endsWith(".html"))
148 checkHtmlFile(f, folder);
155 * Checks that any image attribute in help.hs is a valid target
159 * @param unusedTargets
160 * used targets are removed from here
162 private void checkHelpSet(File helpFolder,
163 Map<String, String> tocTargets, Map<String, String> unusedTargets)
166 BufferedReader br = new BufferedReader(new FileReader(new File(
167 helpFolder, HELP_HS)));
168 String data = br.readLine();
174 String image = getAttribute(data, "image");
177 unusedTargets.remove(image);
178 if (!tocTargets.containsKey(image))
180 log(String.format("Invalid image '%s' at line %d of %s", image,
185 data = br.readLine();
191 * Print counts to sysout
193 * @param unusedTargets
195 private void reportResults(Map<String, String> unusedTargets)
198 log(targetCount + " distinct help targets");
199 log(mapCount + " help mappings");
200 log(invalidTargetCount + " invalid targets");
201 log(unusedTargets.size() + " unused targets");
202 for (String target : unusedTargets.keySet())
204 log(String.format(" %s: %s", target, unusedTargets.get(target)));
206 log(invalidMapUrlCount + " invalid map urls");
207 log(invalidImageCount + " invalid image attributes");
208 log(String.format("%d internal href links (%d with anchors)",
209 internalHrefCount, anchorRefCount));
210 log(invalidInternalHrefCount + " invalid internal href links");
211 log(invalidAnchorRefCount + " invalid internal anchor links");
212 log(externalHrefCount + " external href links");
213 if (internetAvailable)
215 log(invalidExternalHrefCount + " invalid external href links");
220 .println("External links not verified as internet not available");
222 if (invalidInternalHrefCount > 0 || invalidExternalHrefCount > 0
223 || invalidImageCount > 0 || invalidAnchorRefCount > 0)
225 log("*** Failed ***");
228 log("*** Success ***");
234 static void log(String s)
236 System.out.println(s);
240 * Reads the given html file and checks any href attibute values are either
242 * <li>a valid relative file path, or</li>
243 * <li>a valid absolute URL (if external link checking is enabled)</li>
248 * the parent folder (for validation of relative paths)
250 private void checkHtmlFile(File htmlFile, File htmlFolder)
253 BufferedReader br = new BufferedReader(new FileReader(htmlFile));
254 String data = br.readLine();
259 String href = getAttribute(data, "href");
262 String anchor = null;
263 int anchorPos = href.indexOf("#");
266 anchor = href.substring(anchorPos + 1);
267 href = href.substring(0, anchorPos);
269 boolean badLink = false;
270 if (href.startsWith("http"))
273 if (internetAvailable)
275 if (!connectToUrl(href))
278 invalidExternalHrefCount++;
285 File hrefFile = href.equals("") ? htmlFile : new File(htmlFolder,
287 if (hrefFile != htmlFile && !fileExists(hrefFile, href))
290 invalidInternalHrefCount++;
297 if (!checkAnchorExists(hrefFile, anchor))
299 log(String.format("Invalid anchor: %s at line %d of %s",
300 anchor, lineNo, getPath(htmlFile)));
301 invalidAnchorRefCount++;
308 log(String.format("Invalid href %s at line %d of %s", href,
309 lineNo, getPath(htmlFile)));
312 data = br.readLine();
318 * Performs a case-sensitive check that the href'd file exists
322 * @throws IOException
324 boolean fileExists(File hrefFile, String href) throws IOException
326 if (!hrefFile.exists())
332 * On Mac or Windows, file.exists() is not case sensitive, so do an
333 * additional check with case sensitivity
335 int slashPos = href.lastIndexOf(File.separator);
336 String expectedFileName = slashPos == -1 ? href : href
337 .substring(slashPos + 1);
338 String cp = hrefFile.getCanonicalPath();
339 slashPos = cp.lastIndexOf(File.separator);
340 String actualFileName = slashPos == -1 ? cp : cp
341 .substring(slashPos + 1);
343 return expectedFileName.equals(actualFileName);
347 * Reads the file and checks for the presence of the given html anchor
351 * @return true if anchor is found else false
353 private boolean checkAnchorExists(File hrefFile, String anchor)
355 String nameAnchor = "<a name=\"" + anchor + "\"";
356 String idAnchor = "<a id=\"" + anchor + "\"";
357 boolean found = false;
360 BufferedReader br = new BufferedReader(new FileReader(hrefFile));
361 BufferedLineReader blr = new BufferedLineReader(br, 3, this);
362 String data = blr.read();
365 if (data.contains(nameAnchor) || data.contains(idAnchor))
373 } catch (IOException e)
381 * Returns the part of the file path starting from /help/
386 private String getPath(File helpFile)
388 String path = helpFile.getPath();
389 int helpPos = path.indexOf("/help/");
390 return helpPos == -1 ? path : path.substring(helpPos);
394 * Returns true if the URL returns an input stream, or false if the URL
395 * returns an error code or we cannot connect to it (e.g. no internet
401 private boolean connectToUrl(String url)
405 URL u = new URL(url);
406 InputStream connection = u.openStream();
409 } catch (Throwable t)
416 * Reads file help.jhm and checks that
418 * <li>each target attribute is in tocTargets</li>
419 * <li>each url attribute is a valid relative file link</li>
424 private Map<String, String> checkHelpMappings(File helpFolder)
427 Map<String, String> targets = new HashMap<String, String>();
428 BufferedReader br = new BufferedReader(new FileReader(new File(
429 helpFolder, HELP_JHM)));
430 String data = br.readLine();
437 * record target, check for duplicates
439 String target = getAttribute(data, "target");
443 if (targets.containsKey(target))
446 "Duplicate target mapping to %s at line %d of %s",
447 target, lineNo, HELP_JHM));
458 String url = getAttribute(data, "url");
461 targets.put(target, url);
462 int anchorPos = url.indexOf("#");
465 url = url.substring(0, anchorPos);
467 if (!new File(helpFolder, url).exists())
469 log(String.format("Invalid url path '%s' at line %d of %s", url,
471 invalidMapUrlCount++;
474 data = br.readLine();
481 * Reads file helpTOC.xml and reports any invalid targets
485 * @param unusedTargets
486 * used targets are removed from this map
489 * @throws IOException
491 private void checkTableOfContents(File helpFolder,
492 Map<String, String> tocTargets, Map<String, String> unusedTargets)
495 BufferedReader br = new BufferedReader(new FileReader(new File(
496 helpFolder, HELP_TOC_XML)));
497 String data = br.readLine();
503 * assuming no more than one "target" per line of file here
505 String target = getAttribute(data, "target");
508 unusedTargets.remove(target);
509 if (!tocTargets.containsKey(target))
511 log(String.format("Invalid target '%s' at line %d of %s", target,
512 lineNo, HELP_TOC_XML));
513 invalidTargetCount++;
516 data = br.readLine();
522 * Returns the value of an attribute if found in the data, else null
528 private static String getAttribute(String data, String attName)
531 * make a partial attempt at ignoring within <!-- html comments -->
532 * (doesn't work if multi-line)
534 int commentStartPos = data.indexOf("<!--");
535 int commentEndPos = commentStartPos == -1 ? -1 : data.substring(
536 commentStartPos + 4).indexOf("-->");
538 String match = attName + "=\"";
539 int attPos = data.indexOf(match);
541 && (commentStartPos == -1 || attPos < commentStartPos || attPos > commentEndPos))
543 data = data.substring(attPos + match.length());
544 value = data.substring(0, data.indexOf("\""));
550 * Trim whitespace from concatenated lines but preserve one space for valid
554 public String cleanLine(String l)
556 return l.trim() + " ";