--- /dev/null
+
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A class to check help file cross-references, and external URLs if internet
+ * access is available
+ *
+ * @author gmcarstairs
+ *
+ */
+public class HelpLinksChecker
+{
+ private static final String HELP_HS = "help.hs";
+
+ private static final String HELP_TOC_XML = "helpTOC.xml";
+
+ private static final String HELP_JHM = "help.jhm";
+
+ private static boolean internetAvailable = true;
+
+ private int targetCount = 0;
+
+ private int mapCount = 0;
+
+ private int internalHrefCount = 0;
+
+ private int anchorRefCount = 0;
+
+ private int externalHrefCount = 0;
+
+ private int invalidMapUrlCount = 0;
+
+ private int invalidTargetCount = 0;
+
+ private int invalidImageCount = 0;
+
+ private int invalidInternalHrefCount = 0;
+
+ private int invalidExternalHrefCount = 0;
+
+ /**
+ * The only parameter should be a path to the root of the help directory in
+ * the workspace
+ *
+ * @param args
+ * [0] path to the /html folder in the workspace
+ * @param args
+ * [1] (optional) -nointernet to suppress external link checking for
+ * a fast check of internal links only
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException
+ {
+ if (args.length == 0 || args.length > 2
+ || (args.length == 2 && !args[1].equals("-nointernet")))
+ {
+ System.out.println("Usage: <pathToHelpFolder> [-nointernet]");
+ return;
+ }
+
+ if (args.length == 2)
+ {
+ internetAvailable = false;
+ }
+
+ new HelpLinksChecker().checkLinks(args[0]);
+ }
+
+ /**
+ * Checks help links and reports results
+ *
+ * @param helpDirectoryPath
+ * @throws IOException
+ */
+ void checkLinks(String helpDirectoryPath) throws IOException
+ {
+ System.out.println("Checking help file links");
+ File helpFolder = new File(helpDirectoryPath);
+ if (!helpFolder.exists())
+ {
+ System.out.println("Can't find " + helpDirectoryPath);
+ return;
+ }
+
+ internetAvailable &= connectToUrl("http://www.example.com");
+
+ Map<String, String> tocTargets = checkHelpMappings(helpFolder);
+
+ Map<String, String> unusedTargets = new HashMap<String, String>(
+ tocTargets);
+
+ checkTableOfContents(helpFolder, tocTargets, unusedTargets);
+
+ checkHelpSet(helpFolder, tocTargets, unusedTargets);
+
+ checkHtmlFolder(new File(helpFolder, "html"));
+
+ reportResults(unusedTargets);
+ }
+
+ /**
+ * Checks all html files in the given directory or its sub-directories
+ *
+ * @param folder
+ * @throws IOException
+ */
+ private void checkHtmlFolder(File folder) throws IOException
+ {
+ File[] files = folder.listFiles();
+ for (File f : files)
+ {
+ if (f.isDirectory())
+ {
+ checkHtmlFolder(f);
+ }
+ else
+ {
+ if (f.getAbsolutePath().endsWith(".html"))
+ {
+ checkHtmlFile(f, folder);
+ }
+ }
+ }
+ }
+
+ /**
+ * Checks that any image attribute in help.hs is a valid target
+ *
+ * @param helpFolder
+ * @param tocTargets
+ * @param unusedTargets
+ * used targets are removed from here
+ */
+ private void checkHelpSet(File helpFolder,
+ Map<String, String> tocTargets, Map<String, String> unusedTargets)
+ throws IOException
+ {
+ BufferedReader br = new BufferedReader(new FileReader(new File(
+ helpFolder, HELP_HS)));
+ String data = br.readLine();
+ int lineNo = 0;
+
+ while (data != null)
+ {
+ lineNo++;
+ String image = getAttribute(data, "image");
+ if (image != null)
+ {
+ unusedTargets.remove(image);
+ if (!tocTargets.containsKey(image))
+ {
+ System.out.println(String.format(
+ "Invalid image '%s' at line %d of %s", image, lineNo,
+ HELP_HS));
+ invalidImageCount++;
+ }
+ }
+ data = br.readLine();
+ }
+ br.close();
+ }
+
+ /**
+ * Print counts to sysout
+ *
+ * @param unusedTargets
+ */
+ private void reportResults(Map<String, String> unusedTargets)
+ {
+ System.out.println("\nResults:");
+ System.out.println(targetCount + " distinct help targets");
+ System.out.println(mapCount + " help mappings");
+ System.out.println(invalidTargetCount + " invalid targets");
+ System.out.println(unusedTargets.size() + " unused targets");
+ for (String target : unusedTargets.keySet())
+ {
+ System.out.println(String.format(" %s: %s", target,
+ unusedTargets.get(target)));
+ }
+ System.out.println(invalidMapUrlCount + " invalid map urls");
+ System.out.println(invalidImageCount + " invalid image attributes");
+ System.out.println(String.format(
+ "%d internal href links (%d with anchors - not checked)",
+ internalHrefCount, anchorRefCount));
+ System.out.println(invalidInternalHrefCount
+ + " invalid internal href links");
+ System.out.println(externalHrefCount + " external href links");
+ if (internetAvailable)
+ {
+ System.out.println(invalidExternalHrefCount
+ + " invalid external href links");
+ }
+ else
+ {
+ System.out
+ .println("External links not verified as internet not available");
+ }
+
+ }
+
+ /**
+ * Reads the given html file and checks any href attibute values are either
+ * <ul>
+ * <li>a valid relative file path, or</li>
+ * <li>a valid absolute URL (if external link checking is enabled)</li>
+ * </ul>
+ *
+ * @param htmlFile
+ * @param htmlFolder
+ * the parent folder (for validation of relative paths)
+ */
+ private void checkHtmlFile(File htmlFile, File htmlFolder)
+ throws IOException
+ {
+ BufferedReader br = new BufferedReader(new FileReader(htmlFile));
+ String data = br.readLine();
+ int lineNo = 0;
+ while (data != null)
+ {
+ lineNo++;
+ String href = getAttribute(data, "href");
+ if (href != null)
+ {
+ String anchor = null;
+ int anchorPos = href.indexOf("#");
+ if (anchorPos != -1)
+ {
+ anchor = href.substring(anchorPos + 1);
+ href = href.substring(0, anchorPos);
+ }
+ boolean badLink = false;
+ if (href.startsWith("http"))
+ {
+ externalHrefCount++;
+ if (internetAvailable)
+ {
+ if (!connectToUrl(href))
+ {
+ badLink = true;
+ invalidExternalHrefCount++;
+ }
+ }
+ }
+ else
+ {
+ internalHrefCount++;
+ File hrefFile = href.equals("") ? htmlFile : new File(htmlFolder,
+ href);
+ if (!hrefFile.exists())
+ {
+ badLink = true;
+ invalidInternalHrefCount++;
+ }
+ if (anchor != null)
+ {
+ anchorRefCount++;
+ if (!badLink)
+ {
+ if (!checkAnchorExists(hrefFile, anchor))
+ {
+ System.out.println(String.format(
+ "Invalid anchor: %s at line %d of %s", anchor,
+ lineNo, getPath(htmlFile)));
+ }
+ }
+ }
+ }
+ if (badLink)
+ {
+ System.out.println(String.format(
+ "Invalid href %s at line %d of %s", href, lineNo,
+ getPath(htmlFile)));
+ }
+ }
+ data = br.readLine();
+ }
+ br.close();
+ }
+
+ /**
+ * Reads the file and checks for the presence of the given html anchor
+ *
+ * @param hrefFile
+ * @param anchor
+ * @return true if anchor is found else false
+ */
+ private boolean checkAnchorExists(File hrefFile, String anchor)
+ {
+ String nameAnchor = "<a name=\"" + anchor + "\"";
+ String idAnchor = "<a id=\"" + anchor + "\"";
+ boolean found = false;
+ try
+ {
+ BufferedReader br = new BufferedReader(new FileReader(hrefFile));
+ String data = br.readLine();
+ while (data != null)
+ {
+ if (data.contains(nameAnchor) || data.contains(idAnchor))
+ {
+ found = true;
+ break;
+ }
+ data = br.readLine();
+ }
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ return found;
+ }
+
+ /**
+ * Returns the part of the file path starting from /help/
+ *
+ * @param helpFile
+ * @return
+ */
+ private String getPath(File helpFile)
+ {
+ String path = helpFile.getPath();
+ int helpPos = path.indexOf("/help/");
+ return helpPos == -1 ? path : path.substring(helpPos);
+ }
+
+ /**
+ * Returns true if the URL returns an input stream, or false if the URL
+ * returns an error code or we cannot connect to it (e.g. no internet
+ * available)
+ *
+ * @param url
+ * @return
+ */
+ private boolean connectToUrl(String url)
+ {
+ try
+ {
+ URL u = new URL(url);
+ InputStream connection = u.openStream();
+ connection.close();
+ return true;
+ } catch (Throwable t)
+ {
+ return false;
+ }
+ }
+
+ /**
+ * Reads file help.jhm and checks that
+ * <ul>
+ * <li>each target attribute is in tocTargets</li>
+ * <li>each url attribute is a valid relative file link</li>
+ * </ul>
+ *
+ * @param helpFolder
+ */
+ private Map<String, String> checkHelpMappings(File helpFolder)
+ throws IOException
+ {
+ Map<String, String> targets = new HashMap<String, String>();
+ BufferedReader br = new BufferedReader(new FileReader(new File(
+ helpFolder, HELP_JHM)));
+ String data = br.readLine();
+ int lineNo = 0;
+ while (data != null)
+ {
+ lineNo++;
+
+ /*
+ * record target, check for duplicates
+ */
+ String target = getAttribute(data, "target");
+ if (target != null)
+ {
+ mapCount++;
+ if (targets.containsKey(target))
+ {
+ System.out.println(String.format(
+ "Duplicate target mapping to %s at line %d of %s",
+ target, lineNo, HELP_JHM));
+ }
+ else
+ {
+ targetCount++;
+ }
+ }
+
+ /*
+ * validate url
+ */
+ String url = getAttribute(data, "url");
+ if (url != null)
+ {
+ targets.put(target, url);
+ int anchorPos = url.indexOf("#");
+ if (anchorPos != -1)
+ {
+ url = url.substring(0, anchorPos);
+ }
+ if (!new File(helpFolder, url).exists())
+ {
+ System.out.println(String.format(
+ "Invalid url path '%s' at line %d of %s", url, lineNo,
+ HELP_JHM));
+ invalidMapUrlCount++;
+ }
+ }
+ data = br.readLine();
+ }
+ br.close();
+ return targets;
+ }
+
+ /**
+ * Reads file helpTOC.xml and reports any invalid targets
+ *
+ * @param helpFolder
+ * @param tocTargets
+ * @param unusedTargets
+ * used targets are removed from this map
+ *
+ * @return
+ * @throws IOException
+ */
+ private void checkTableOfContents(File helpFolder,
+ Map<String, String> tocTargets, Map<String, String> unusedTargets)
+ throws IOException
+ {
+ BufferedReader br = new BufferedReader(new FileReader(new File(
+ helpFolder, HELP_TOC_XML)));
+ String data = br.readLine();
+ int lineNo = 0;
+ while (data != null)
+ {
+ lineNo++;
+ /*
+ * assuming no more than one "target" per line of file here
+ */
+ String target = getAttribute(data, "target");
+ if (target != null)
+ {
+ unusedTargets.remove(target);
+ if (!tocTargets.containsKey(target))
+ {
+ System.out.println(String.format(
+ "Invalid target '%s' at line %d of %s", target, lineNo,
+ HELP_TOC_XML));
+ invalidTargetCount++;
+ }
+ }
+ data = br.readLine();
+ }
+ br.close();
+ }
+
+ /**
+ * Returns the value of an attribute if found in the data, else null
+ *
+ * @param data
+ * @param attName
+ * @return
+ */
+ private static String getAttribute(String data, String attName)
+ {
+ /*
+ * make a partial attempt at ignoring within <!-- html comments -->
+ * (doesn't work if multi-line)
+ */
+ int commentStartPos = data.indexOf("<!--");
+ int commentEndPos = commentStartPos == -1 ? -1 : data.substring(
+ commentStartPos + 4).indexOf("-->");
+ String value = null;
+ String match = attName + "=\"";
+ int attPos = data.indexOf(match);
+ if (attPos > 0
+ && (commentStartPos == -1 || attPos < commentStartPos || attPos > commentEndPos))
+ {
+ data = data.substring(attPos + match.length());
+ value = data.substring(0, data.indexOf("\""));
+ }
+ return value;
+ }
+}