2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 import java.io.BufferedReader;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.io.InputStream;
27 import java.util.HashMap;
31 * A class to check help file cross-references, and external URLs if internet
37 public class HelpLinksChecker implements BufferedLineReader.LineCleaner
39 private static final String HELP_HS = "help.hs";
41 private static final String HELP_TOC_XML = "helpTOC.xml";
43 private static final String HELP_JHM = "help.jhm";
45 private static boolean internetAvailable = true;
47 private int targetCount = 0;
49 private int mapCount = 0;
51 private int internalHrefCount = 0;
53 private int anchorRefCount = 0;
55 private int invalidAnchorRefCount = 0;
57 private int externalHrefCount = 0;
59 private int invalidMapUrlCount = 0;
61 private int invalidTargetCount = 0;
63 private int invalidImageCount = 0;
65 private int invalidInternalHrefCount = 0;
67 private int invalidExternalHrefCount = 0;
70 * The only parameter should be a path to the root of the help directory in
74 * [0] path to the /html folder in the workspace
76 * [1] (optional) -nointernet to suppress external link checking for
77 * a fast check of internal links only
80 public static void main(String[] args) throws IOException
82 if (args.length == 0 || args.length > 2
83 || (args.length == 2 && !args[1].equals("-nointernet")))
85 log("Usage: <pathToHelpFolder> [-nointernet]");
91 internetAvailable = false;
94 new HelpLinksChecker().checkLinks(args[0]);
98 * Checks help links and reports results
100 * @param helpDirectoryPath
101 * @throws IOException
103 void checkLinks(String helpDirectoryPath) throws IOException
105 log("Checking help file links");
106 File helpFolder = new File(helpDirectoryPath).getCanonicalFile();
107 if (!helpFolder.exists())
109 log("Can't find " + helpDirectoryPath);
113 internetAvailable &= connectToUrl("http://www.example.org");
115 Map<String, String> tocTargets = checkHelpMappings(helpFolder);
117 Map<String, String> unusedTargets = new HashMap<String, String>(
120 checkTableOfContents(helpFolder, tocTargets, unusedTargets);
122 checkHelpSet(helpFolder, tocTargets, unusedTargets);
124 checkHtmlFolder(new File(helpFolder, "html"));
126 reportResults(unusedTargets);
130 * Checks all html files in the given directory or its sub-directories
133 * @throws IOException
135 private void checkHtmlFolder(File folder) throws IOException
137 File[] files = folder.listFiles();
146 if (f.getAbsolutePath().endsWith(".html"))
148 checkHtmlFile(f, folder);
155 * Checks that any image attribute in help.hs is a valid target
159 * @param unusedTargets
160 * used targets are removed from here
162 private void checkHelpSet(File helpFolder,
163 Map<String, String> tocTargets, Map<String, String> unusedTargets)
166 BufferedReader br = new BufferedReader(new FileReader(new File(
167 helpFolder, HELP_HS)));
168 String data = br.readLine();
174 String image = getAttribute(data, "image");
177 unusedTargets.remove(image);
178 if (!tocTargets.containsKey(image))
180 log(String.format("Invalid image '%s' at line %d of %s", image,
185 data = br.readLine();
191 * Print counts to sysout
193 * @param unusedTargets
195 private void reportResults(Map<String, String> unusedTargets)
198 log(targetCount + " distinct help targets");
199 log(mapCount + " help mappings");
200 log(invalidTargetCount + " invalid targets");
201 log(unusedTargets.size() + " unused targets");
202 for (String target : unusedTargets.keySet())
204 log(String.format(" %s: %s", target, unusedTargets.get(target)));
206 log(invalidMapUrlCount + " invalid map urls");
207 log(invalidImageCount + " invalid image attributes");
208 log(String.format("%d internal href links (%d with anchors)",
209 internalHrefCount, anchorRefCount));
210 log(invalidInternalHrefCount + " invalid internal href links");
211 log(invalidAnchorRefCount + " invalid internal anchor links");
212 log(externalHrefCount + " external href links");
213 if (internetAvailable)
215 log(invalidExternalHrefCount + " invalid external href links");
220 .println("External links not verified as internet not available");
222 if (invalidInternalHrefCount > 0 || invalidExternalHrefCount > 0
223 || invalidImageCount > 0 || invalidAnchorRefCount > 0)
225 log("*** Failed ***");
228 log("*** Success ***");
234 static void log(String s)
236 System.out.println(s);
240 * Reads the given html file and checks any href attibute values are either
242 * <li>a valid relative file path, or</li>
243 * <li>a valid absolute URL (if external link checking is enabled)</li>
248 * the parent folder (for validation of relative paths)
250 private void checkHtmlFile(File htmlFile, File htmlFolder)
253 BufferedReader br = new BufferedReader(new FileReader(htmlFile));
254 String data = br.readLine();
259 String href = getAttribute(data, "href");
262 String anchor = null;
263 int anchorPos = href.indexOf("#");
266 anchor = href.substring(anchorPos + 1);
267 href = href.substring(0, anchorPos);
269 boolean badLink = false;
270 if (href.startsWith("http"))
273 if (internetAvailable)
275 if (!connectToUrl(href))
278 invalidExternalHrefCount++;
285 String relFile = System.getProperty("os.name").indexOf("Win") > -1 ? href.replace("/", File.separator) : href;
286 File hrefFile = href.equals("") ? htmlFile : new File(htmlFolder,
288 if (hrefFile != htmlFile && !fileExists(hrefFile, relFile))
291 invalidInternalHrefCount++;
298 if (!checkAnchorExists(hrefFile, anchor))
300 log(String.format("Invalid anchor: %s at line %d of %s",
301 anchor, lineNo, getPath(htmlFile)));
302 invalidAnchorRefCount++;
309 log(String.format("Invalid href %s at line %d of %s", href,
310 lineNo, getPath(htmlFile)));
313 data = br.readLine();
319 * Performs a case-sensitive check that the href'd file exists
323 * @throws IOException
325 boolean fileExists(File hrefFile, String href) throws IOException
327 if (!hrefFile.exists())
333 * On Mac or Windows, file.exists() is not case sensitive, so do an
334 * additional check with case sensitivity
336 int slashPos = href.lastIndexOf(File.separator);
337 String expectedFileName = slashPos == -1 ? href : href
338 .substring(slashPos + 1);
339 String cp = hrefFile.getCanonicalPath();
340 slashPos = cp.lastIndexOf(File.separator);
341 String actualFileName = slashPos == -1 ? cp : cp
342 .substring(slashPos + 1);
344 return expectedFileName.equals(actualFileName);
348 * Reads the file and checks for the presence of the given html anchor
352 * @return true if anchor is found else false
354 private boolean checkAnchorExists(File hrefFile, String anchor)
356 String nameAnchor = "<a name=\"" + anchor + "\"";
357 String idAnchor = "<a id=\"" + anchor + "\"";
358 boolean found = false;
361 BufferedReader br = new BufferedReader(new FileReader(hrefFile));
362 BufferedLineReader blr = new BufferedLineReader(br, 3, this);
363 String data = blr.read();
366 if (data.contains(nameAnchor) || data.contains(idAnchor))
374 } catch (IOException e)
382 * Returns the part of the file path starting from /help/
387 private String getPath(File helpFile)
389 String path = helpFile.getPath();
390 int helpPos = path.indexOf("/help/");
391 return helpPos == -1 ? path : path.substring(helpPos);
395 * Returns true if the URL returns an input stream, or false if the URL
396 * returns an error code or we cannot connect to it (e.g. no internet
402 private boolean connectToUrl(String url)
406 URL u = new URL(url);
407 InputStream connection = u.openStream();
410 } catch (Throwable t)
417 * Reads file help.jhm and checks that
419 * <li>each target attribute is in tocTargets</li>
420 * <li>each url attribute is a valid relative file link</li>
425 private Map<String, String> checkHelpMappings(File helpFolder)
428 Map<String, String> targets = new HashMap<String, String>();
429 BufferedReader br = new BufferedReader(new FileReader(new File(
430 helpFolder, HELP_JHM)));
431 String data = br.readLine();
438 * record target, check for duplicates
440 String target = getAttribute(data, "target");
444 if (targets.containsKey(target))
447 "Duplicate target mapping to %s at line %d of %s",
448 target, lineNo, HELP_JHM));
459 String url = getAttribute(data, "url");
462 targets.put(target, url);
463 int anchorPos = url.indexOf("#");
466 url = url.substring(0, anchorPos);
468 if (!new File(helpFolder, url).exists())
470 log(String.format("Invalid url path '%s' at line %d of %s", url,
472 invalidMapUrlCount++;
475 data = br.readLine();
482 * Reads file helpTOC.xml and reports any invalid targets
486 * @param unusedTargets
487 * used targets are removed from this map
490 * @throws IOException
492 private void checkTableOfContents(File helpFolder,
493 Map<String, String> tocTargets, Map<String, String> unusedTargets)
496 BufferedReader br = new BufferedReader(new FileReader(new File(
497 helpFolder, HELP_TOC_XML)));
498 String data = br.readLine();
504 * assuming no more than one "target" per line of file here
506 String target = getAttribute(data, "target");
509 unusedTargets.remove(target);
510 if (!tocTargets.containsKey(target))
512 log(String.format("Invalid target '%s' at line %d of %s", target,
513 lineNo, HELP_TOC_XML));
514 invalidTargetCount++;
517 data = br.readLine();
523 * Returns the value of an attribute if found in the data, else null
529 private static String getAttribute(String data, String attName)
532 * make a partial attempt at ignoring within <!-- html comments -->
533 * (doesn't work if multi-line)
535 int commentStartPos = data.indexOf("<!--");
536 int commentEndPos = commentStartPos == -1 ? -1 : data.substring(
537 commentStartPos + 4).indexOf("-->");
539 String match = attName + "=\"";
540 int attPos = data.indexOf(match);
542 && (commentStartPos == -1 || attPos < commentStartPos || attPos > commentEndPos))
544 data = data.substring(attPos + match.length());
545 value = data.substring(0, data.indexOf("\""));
551 * Trim whitespace from concatenated lines but preserve one space for valid
555 public String cleanLine(String l)
557 return l.trim() + " ";