3 import java.io.BufferedReader;
5 import java.io.FileReader;
6 import java.io.IOException;
7 import java.io.InputStream;
9 import java.util.HashMap;
13 * A class to check help file cross-references, and external URLs if internet
19 public class HelpLinksChecker
21 private static final String HELP_HS = "help.hs";
23 private static final String HELP_TOC_XML = "helpTOC.xml";
25 private static final String HELP_JHM = "help.jhm";
27 private static boolean internetAvailable = true;
29 private int targetCount = 0;
31 private int mapCount = 0;
33 private int internalHrefCount = 0;
35 private int anchorRefCount = 0;
37 private int externalHrefCount = 0;
39 private int invalidMapUrlCount = 0;
41 private int invalidTargetCount = 0;
43 private int invalidImageCount = 0;
45 private int invalidInternalHrefCount = 0;
47 private int invalidExternalHrefCount = 0;
50 * The only parameter should be a path to the root of the help directory in
54 * [0] path to the /html folder in the workspace
56 * [1] (optional) -nointernet to suppress external link checking for
57 * a fast check of internal links only
60 public static void main(String[] args) throws IOException
62 if (args.length == 0 || args.length > 2
63 || (args.length == 2 && !args[1].equals("-nointernet")))
65 System.out.println("Usage: <pathToHelpFolder> [-nointernet]");
71 internetAvailable = false;
74 new HelpLinksChecker().checkLinks(args[0]);
78 * Checks help links and reports results
80 * @param helpDirectoryPath
83 void checkLinks(String helpDirectoryPath) throws IOException
85 System.out.println("Checking help file links");
86 File helpFolder = new File(helpDirectoryPath);
87 if (!helpFolder.exists())
89 System.out.println("Can't find " + helpDirectoryPath);
93 internetAvailable &= connectToUrl("http://www.example.com");
95 Map<String, String> tocTargets = checkHelpMappings(helpFolder);
97 Map<String, String> unusedTargets = new HashMap<String, String>(
100 checkTableOfContents(helpFolder, tocTargets, unusedTargets);
102 checkHelpSet(helpFolder, tocTargets, unusedTargets);
104 checkHtmlFolder(new File(helpFolder, "html"));
106 reportResults(unusedTargets);
110 * Checks all html files in the given directory or its sub-directories
113 * @throws IOException
115 private void checkHtmlFolder(File folder) throws IOException
117 File[] files = folder.listFiles();
126 if (f.getAbsolutePath().endsWith(".html"))
128 checkHtmlFile(f, folder);
135 * Checks that any image attribute in help.hs is a valid target
139 * @param unusedTargets
140 * used targets are removed from here
142 private void checkHelpSet(File helpFolder,
143 Map<String, String> tocTargets, Map<String, String> unusedTargets)
146 BufferedReader br = new BufferedReader(new FileReader(new File(
147 helpFolder, HELP_HS)));
148 String data = br.readLine();
154 String image = getAttribute(data, "image");
157 unusedTargets.remove(image);
158 if (!tocTargets.containsKey(image))
160 System.out.println(String.format(
161 "Invalid image '%s' at line %d of %s", image, lineNo,
166 data = br.readLine();
172 * Print counts to sysout
174 * @param unusedTargets
176 private void reportResults(Map<String, String> unusedTargets)
178 System.out.println("\nResults:");
179 System.out.println(targetCount + " distinct help targets");
180 System.out.println(mapCount + " help mappings");
181 System.out.println(invalidTargetCount + " invalid targets");
182 System.out.println(unusedTargets.size() + " unused targets");
183 for (String target : unusedTargets.keySet())
185 System.out.println(String.format(" %s: %s", target,
186 unusedTargets.get(target)));
188 System.out.println(invalidMapUrlCount + " invalid map urls");
189 System.out.println(invalidImageCount + " invalid image attributes");
190 System.out.println(String.format(
191 "%d internal href links (%d with anchors - not checked)",
192 internalHrefCount, anchorRefCount));
193 System.out.println(invalidInternalHrefCount
194 + " invalid internal href links");
195 System.out.println(externalHrefCount + " external href links");
196 if (internetAvailable)
198 System.out.println(invalidExternalHrefCount
199 + " invalid external href links");
204 .println("External links not verified as internet not available");
210 * Reads the given html file and checks any href attibute values are either
212 * <li>a valid relative file path, or</li>
213 * <li>a valid absolute URL (if external link checking is enabled)</li>
218 * the parent folder (for validation of relative paths)
220 private void checkHtmlFile(File htmlFile, File htmlFolder)
223 BufferedReader br = new BufferedReader(new FileReader(htmlFile));
224 String data = br.readLine();
229 String href = getAttribute(data, "href");
232 String anchor = null;
233 int anchorPos = href.indexOf("#");
236 anchor = href.substring(anchorPos + 1);
237 href = href.substring(0, anchorPos);
239 boolean badLink = false;
240 if (href.startsWith("http"))
243 if (internetAvailable)
245 if (!connectToUrl(href))
248 invalidExternalHrefCount++;
255 File hrefFile = href.equals("") ? htmlFile : new File(htmlFolder,
257 if (!hrefFile.exists())
260 invalidInternalHrefCount++;
267 if (!checkAnchorExists(hrefFile, anchor))
269 System.out.println(String.format(
270 "Invalid anchor: %s at line %d of %s", anchor,
271 lineNo, getPath(htmlFile)));
278 System.out.println(String.format(
279 "Invalid href %s at line %d of %s", href, lineNo,
283 data = br.readLine();
289 * Reads the file and checks for the presence of the given html anchor
293 * @return true if anchor is found else false
295 private boolean checkAnchorExists(File hrefFile, String anchor)
297 String nameAnchor = "<a name=\"" + anchor + "\"";
298 String idAnchor = "<a id=\"" + anchor + "\"";
299 boolean found = false;
302 BufferedReader br = new BufferedReader(new FileReader(hrefFile));
303 String data = br.readLine();
306 if (data.contains(nameAnchor) || data.contains(idAnchor))
311 data = br.readLine();
314 } catch (IOException e)
322 * Returns the part of the file path starting from /help/
327 private String getPath(File helpFile)
329 String path = helpFile.getPath();
330 int helpPos = path.indexOf("/help/");
331 return helpPos == -1 ? path : path.substring(helpPos);
335 * Returns true if the URL returns an input stream, or false if the URL
336 * returns an error code or we cannot connect to it (e.g. no internet
342 private boolean connectToUrl(String url)
346 URL u = new URL(url);
347 InputStream connection = u.openStream();
350 } catch (Throwable t)
357 * Reads file help.jhm and checks that
359 * <li>each target attribute is in tocTargets</li>
360 * <li>each url attribute is a valid relative file link</li>
365 private Map<String, String> checkHelpMappings(File helpFolder)
368 Map<String, String> targets = new HashMap<String, String>();
369 BufferedReader br = new BufferedReader(new FileReader(new File(
370 helpFolder, HELP_JHM)));
371 String data = br.readLine();
378 * record target, check for duplicates
380 String target = getAttribute(data, "target");
384 if (targets.containsKey(target))
386 System.out.println(String.format(
387 "Duplicate target mapping to %s at line %d of %s",
388 target, lineNo, HELP_JHM));
399 String url = getAttribute(data, "url");
402 targets.put(target, url);
403 int anchorPos = url.indexOf("#");
406 url = url.substring(0, anchorPos);
408 if (!new File(helpFolder, url).exists())
410 System.out.println(String.format(
411 "Invalid url path '%s' at line %d of %s", url, lineNo,
413 invalidMapUrlCount++;
416 data = br.readLine();
423 * Reads file helpTOC.xml and reports any invalid targets
427 * @param unusedTargets
428 * used targets are removed from this map
431 * @throws IOException
433 private void checkTableOfContents(File helpFolder,
434 Map<String, String> tocTargets, Map<String, String> unusedTargets)
437 BufferedReader br = new BufferedReader(new FileReader(new File(
438 helpFolder, HELP_TOC_XML)));
439 String data = br.readLine();
445 * assuming no more than one "target" per line of file here
447 String target = getAttribute(data, "target");
450 unusedTargets.remove(target);
451 if (!tocTargets.containsKey(target))
453 System.out.println(String.format(
454 "Invalid target '%s' at line %d of %s", target, lineNo,
456 invalidTargetCount++;
459 data = br.readLine();
465 * Returns the value of an attribute if found in the data, else null
471 private static String getAttribute(String data, String attName)
474 * make a partial attempt at ignoring within <!-- html comments -->
475 * (doesn't work if multi-line)
477 int commentStartPos = data.indexOf("<!--");
478 int commentEndPos = commentStartPos == -1 ? -1 : data.substring(
479 commentStartPos + 4).indexOf("-->");
481 String match = attName + "=\"";
482 int attPos = data.indexOf(match);
484 && (commentStartPos == -1 || attPos < commentStartPos || attPos > commentEndPos))
486 data = data.substring(attPos + match.length());
487 value = data.substring(0, data.indexOf("\""));