3 import java.io.BufferedReader;
5 import java.io.FileReader;
6 import java.io.IOException;
7 import java.io.InputStream;
9 import java.util.HashMap;
13 * A class to check help file cross-references, and external URLs if internet
19 public class HelpLinksChecker
21 private static final String HELP_HS = "help.hs";
23 private static final String HELP_TOC_XML = "helpTOC.xml";
25 private static final String HELP_JHM = "help.jhm";
27 private static boolean internetAvailable = true;
29 private int targetCount = 0;
31 private int mapCount = 0;
33 private int internalHrefCount = 0;
35 private int anchorRefCount = 0;
37 private int invalidAnchorRefCount = 0;
39 private int externalHrefCount = 0;
41 private int invalidMapUrlCount = 0;
43 private int invalidTargetCount = 0;
45 private int invalidImageCount = 0;
47 private int invalidInternalHrefCount = 0;
49 private int invalidExternalHrefCount = 0;
52 * The only parameter should be a path to the root of the help directory in
56 * [0] path to the /html folder in the workspace
58 * [1] (optional) -nointernet to suppress external link checking for
59 * a fast check of internal links only
62 public static void main(String[] args) throws IOException
64 if (args.length == 0 || args.length > 2
65 || (args.length == 2 && !args[1].equals("-nointernet")))
67 log("Usage: <pathToHelpFolder> [-nointernet]");
73 internetAvailable = false;
76 new HelpLinksChecker().checkLinks(args[0]);
80 * Checks help links and reports results
82 * @param helpDirectoryPath
85 void checkLinks(String helpDirectoryPath) throws IOException
87 log("Checking help file links");
88 File helpFolder = new File(helpDirectoryPath).getCanonicalFile();
89 if (!helpFolder.exists())
91 log("Can't find " + helpDirectoryPath);
95 internetAvailable &= connectToUrl("http://www.example.org");
97 Map<String, String> tocTargets = checkHelpMappings(helpFolder);
99 Map<String, String> unusedTargets = new HashMap<String, String>(
102 checkTableOfContents(helpFolder, tocTargets, unusedTargets);
104 checkHelpSet(helpFolder, tocTargets, unusedTargets);
106 checkHtmlFolder(new File(helpFolder, "html"));
108 reportResults(unusedTargets);
112 * Checks all html files in the given directory or its sub-directories
115 * @throws IOException
117 private void checkHtmlFolder(File folder) throws IOException
119 File[] files = folder.listFiles();
128 if (f.getAbsolutePath().endsWith(".html"))
130 checkHtmlFile(f, folder);
137 * Checks that any image attribute in help.hs is a valid target
141 * @param unusedTargets
142 * used targets are removed from here
144 private void checkHelpSet(File helpFolder,
145 Map<String, String> tocTargets, Map<String, String> unusedTargets)
148 BufferedReader br = new BufferedReader(new FileReader(new File(
149 helpFolder, HELP_HS)));
150 String data = br.readLine();
156 String image = getAttribute(data, "image");
159 unusedTargets.remove(image);
160 if (!tocTargets.containsKey(image))
163 "Invalid image '%s' at line %d of %s", image, lineNo,
168 data = br.readLine();
174 * Print counts to sysout
176 * @param unusedTargets
178 private void reportResults(Map<String, String> unusedTargets)
181 log(targetCount + " distinct help targets");
182 log(mapCount + " help mappings");
183 log(invalidTargetCount + " invalid targets");
184 log(unusedTargets.size() + " unused targets");
185 for (String target : unusedTargets.keySet())
187 log(String.format(" %s: %s", target,
188 unusedTargets.get(target)));
190 log(invalidMapUrlCount + " invalid map urls");
191 log(invalidImageCount + " invalid image attributes");
193 "%d internal href links (%d with anchors)", internalHrefCount,
195 log(invalidInternalHrefCount
196 + " invalid internal href links");
197 log(invalidAnchorRefCount
198 + " invalid internal anchor links");
199 log(externalHrefCount + " external href links");
200 if (internetAvailable)
202 log(invalidExternalHrefCount
203 + " invalid external href links");
208 .println("External links not verified as internet not available");
210 if (invalidInternalHrefCount > 0 || invalidExternalHrefCount > 0
211 || invalidImageCount > 0 || invalidAnchorRefCount > 0)
213 log("*** Failed ***");
216 log("*** Success ***");
222 static void log(String s)
224 System.out.println(s);
228 * Reads the given html file and checks any href attibute values are either
230 * <li>a valid relative file path, or</li>
231 * <li>a valid absolute URL (if external link checking is enabled)</li>
236 * the parent folder (for validation of relative paths)
238 private void checkHtmlFile(File htmlFile, File htmlFolder)
241 BufferedReader br = new BufferedReader(new FileReader(htmlFile));
242 String data = br.readLine();
247 String href = getAttribute(data, "href");
250 String anchor = null;
251 int anchorPos = href.indexOf("#");
254 anchor = href.substring(anchorPos + 1);
255 href = href.substring(0, anchorPos);
257 boolean badLink = false;
258 if (href.startsWith("http"))
261 if (internetAvailable)
263 if (!connectToUrl(href))
266 invalidExternalHrefCount++;
273 File hrefFile = href.equals("") ? htmlFile : new File(htmlFolder,
275 if (hrefFile != htmlFile && !fileExists(hrefFile, href))
278 invalidInternalHrefCount++;
285 if (!checkAnchorExists(hrefFile, anchor))
288 "Invalid anchor: %s at line %d of %s", anchor,
289 lineNo, getPath(htmlFile)));
290 invalidAnchorRefCount++;
298 "Invalid href %s at line %d of %s", href, lineNo,
302 data = br.readLine();
308 * Performs a case-sensitive check that the href'd file exists
312 * @throws IOException
314 boolean fileExists(File hrefFile, String href) throws IOException
316 if (!hrefFile.exists())
322 * On Mac or Windows, file.exists() is not case sensitive, so do an
323 * additional check with case sensitivity
325 int slashPos = href.lastIndexOf(File.separator);
326 String expectedFileName = slashPos == -1 ? href : href
327 .substring(slashPos + 1);
328 String cp = hrefFile.getCanonicalPath();
329 slashPos = cp.lastIndexOf(File.separator);
330 String actualFileName = slashPos == -1 ? cp : cp
331 .substring(slashPos + 1);
333 return expectedFileName.equals(actualFileName);
337 * Reads the file and checks for the presence of the given html anchor
341 * @return true if anchor is found else false
343 private boolean checkAnchorExists(File hrefFile, String anchor)
345 String nameAnchor = "<a name=\"" + anchor + "\"";
346 String idAnchor = "<a id=\"" + anchor + "\"";
347 boolean found = false;
350 BufferedReader br = new BufferedReader(new FileReader(hrefFile));
351 String data = br.readLine();
354 if (data.contains(nameAnchor) || data.contains(idAnchor))
359 data = br.readLine();
362 } catch (IOException e)
370 * Returns the part of the file path starting from /help/
375 private String getPath(File helpFile)
377 String path = helpFile.getPath();
378 int helpPos = path.indexOf("/help/");
379 return helpPos == -1 ? path : path.substring(helpPos);
383 * Returns true if the URL returns an input stream, or false if the URL
384 * returns an error code or we cannot connect to it (e.g. no internet
390 private boolean connectToUrl(String url)
394 URL u = new URL(url);
395 InputStream connection = u.openStream();
398 } catch (Throwable t)
405 * Reads file help.jhm and checks that
407 * <li>each target attribute is in tocTargets</li>
408 * <li>each url attribute is a valid relative file link</li>
413 private Map<String, String> checkHelpMappings(File helpFolder)
416 Map<String, String> targets = new HashMap<String, String>();
417 BufferedReader br = new BufferedReader(new FileReader(new File(
418 helpFolder, HELP_JHM)));
419 String data = br.readLine();
426 * record target, check for duplicates
428 String target = getAttribute(data, "target");
432 if (targets.containsKey(target))
435 "Duplicate target mapping to %s at line %d of %s",
436 target, lineNo, HELP_JHM));
447 String url = getAttribute(data, "url");
450 targets.put(target, url);
451 int anchorPos = url.indexOf("#");
454 url = url.substring(0, anchorPos);
456 if (!new File(helpFolder, url).exists())
459 "Invalid url path '%s' at line %d of %s", url, lineNo,
461 invalidMapUrlCount++;
464 data = br.readLine();
471 * Reads file helpTOC.xml and reports any invalid targets
475 * @param unusedTargets
476 * used targets are removed from this map
479 * @throws IOException
481 private void checkTableOfContents(File helpFolder,
482 Map<String, String> tocTargets, Map<String, String> unusedTargets)
485 BufferedReader br = new BufferedReader(new FileReader(new File(
486 helpFolder, HELP_TOC_XML)));
487 String data = br.readLine();
493 * assuming no more than one "target" per line of file here
495 String target = getAttribute(data, "target");
498 unusedTargets.remove(target);
499 if (!tocTargets.containsKey(target))
502 "Invalid target '%s' at line %d of %s", target, lineNo,
504 invalidTargetCount++;
507 data = br.readLine();
513 * Returns the value of an attribute if found in the data, else null
519 private static String getAttribute(String data, String attName)
522 * make a partial attempt at ignoring within <!-- html comments -->
523 * (doesn't work if multi-line)
525 int commentStartPos = data.indexOf("<!--");
526 int commentEndPos = commentStartPos == -1 ? -1 : data.substring(
527 commentStartPos + 4).indexOf("-->");
529 String match = attName + "=\"";
530 int attPos = data.indexOf(match);
532 && (commentStartPos == -1 || attPos < commentStartPos || attPos > commentEndPos))
534 data = data.substring(attPos + match.length());
535 value = data.substring(0, data.indexOf("\""));