From b58f3fc059df3b857f9e7047b0b7fba578387399 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 30 May 2016 14:45:08 +0100 Subject: [PATCH] JAL-2119 help links checker added to /utils --- utils/HelpLinksChecker.java | 491 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 491 insertions(+) create mode 100644 utils/HelpLinksChecker.java diff --git a/utils/HelpLinksChecker.java b/utils/HelpLinksChecker.java new file mode 100644 index 0000000..1f666a4 --- /dev/null +++ b/utils/HelpLinksChecker.java @@ -0,0 +1,491 @@ + + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + +/** + * A class to check help file cross-references, and external URLs if internet + * access is available + * + * @author gmcarstairs + * + */ +public class HelpLinksChecker +{ + private static final String HELP_HS = "help.hs"; + + private static final String HELP_TOC_XML = "helpTOC.xml"; + + private static final String HELP_JHM = "help.jhm"; + + private static boolean internetAvailable = true; + + private int targetCount = 0; + + private int mapCount = 0; + + private int internalHrefCount = 0; + + private int anchorRefCount = 0; + + private int externalHrefCount = 0; + + private int invalidMapUrlCount = 0; + + private int invalidTargetCount = 0; + + private int invalidImageCount = 0; + + private int invalidInternalHrefCount = 0; + + private int invalidExternalHrefCount = 0; + + /** + * The only parameter should be a path to the root of the help directory in + * the workspace + * + * @param args + * [0] path to the /html folder in the workspace + * @param args + * [1] (optional) -nointernet to suppress external link checking for + * a fast check of internal links only + * @throws IOException + */ + public static void main(String[] args) throws IOException + { + if (args.length == 0 || args.length > 2 + || (args.length == 2 && !args[1].equals("-nointernet"))) + { + System.out.println("Usage: [-nointernet]"); + return; + } + + if (args.length == 2) + { + internetAvailable = false; + } + + new HelpLinksChecker().checkLinks(args[0]); + } + + /** + * Checks help links and reports results + * + * @param helpDirectoryPath + * @throws IOException + */ + void checkLinks(String helpDirectoryPath) throws IOException + { + System.out.println("Checking help file links"); + File helpFolder = new File(helpDirectoryPath); + if (!helpFolder.exists()) + { + System.out.println("Can't find " + helpDirectoryPath); + return; + } + + internetAvailable &= connectToUrl("http://www.example.com"); + + Map tocTargets = checkHelpMappings(helpFolder); + + Map unusedTargets = new HashMap( + tocTargets); + + checkTableOfContents(helpFolder, tocTargets, unusedTargets); + + checkHelpSet(helpFolder, tocTargets, unusedTargets); + + checkHtmlFolder(new File(helpFolder, "html")); + + reportResults(unusedTargets); + } + + /** + * Checks all html files in the given directory or its sub-directories + * + * @param folder + * @throws IOException + */ + private void checkHtmlFolder(File folder) throws IOException + { + File[] files = folder.listFiles(); + for (File f : files) + { + if (f.isDirectory()) + { + checkHtmlFolder(f); + } + else + { + if (f.getAbsolutePath().endsWith(".html")) + { + checkHtmlFile(f, folder); + } + } + } + } + + /** + * Checks that any image attribute in help.hs is a valid target + * + * @param helpFolder + * @param tocTargets + * @param unusedTargets + * used targets are removed from here + */ + private void checkHelpSet(File helpFolder, + Map tocTargets, Map unusedTargets) + throws IOException + { + BufferedReader br = new BufferedReader(new FileReader(new File( + helpFolder, HELP_HS))); + String data = br.readLine(); + int lineNo = 0; + + while (data != null) + { + lineNo++; + String image = getAttribute(data, "image"); + if (image != null) + { + unusedTargets.remove(image); + if (!tocTargets.containsKey(image)) + { + System.out.println(String.format( + "Invalid image '%s' at line %d of %s", image, lineNo, + HELP_HS)); + invalidImageCount++; + } + } + data = br.readLine(); + } + br.close(); + } + + /** + * Print counts to sysout + * + * @param unusedTargets + */ + private void reportResults(Map unusedTargets) + { + System.out.println("\nResults:"); + System.out.println(targetCount + " distinct help targets"); + System.out.println(mapCount + " help mappings"); + System.out.println(invalidTargetCount + " invalid targets"); + System.out.println(unusedTargets.size() + " unused targets"); + for (String target : unusedTargets.keySet()) + { + System.out.println(String.format(" %s: %s", target, + unusedTargets.get(target))); + } + System.out.println(invalidMapUrlCount + " invalid map urls"); + System.out.println(invalidImageCount + " invalid image attributes"); + System.out.println(String.format( + "%d internal href links (%d with anchors - not checked)", + internalHrefCount, anchorRefCount)); + System.out.println(invalidInternalHrefCount + + " invalid internal href links"); + System.out.println(externalHrefCount + " external href links"); + if (internetAvailable) + { + System.out.println(invalidExternalHrefCount + + " invalid external href links"); + } + else + { + System.out + .println("External links not verified as internet not available"); + } + + } + + /** + * Reads the given html file and checks any href attibute values are either + *
    + *
  • a valid relative file path, or
  • + *
  • a valid absolute URL (if external link checking is enabled)
  • + *
+ * + * @param htmlFile + * @param htmlFolder + * the parent folder (for validation of relative paths) + */ + private void checkHtmlFile(File htmlFile, File htmlFolder) + throws IOException + { + BufferedReader br = new BufferedReader(new FileReader(htmlFile)); + String data = br.readLine(); + int lineNo = 0; + while (data != null) + { + lineNo++; + String href = getAttribute(data, "href"); + if (href != null) + { + String anchor = null; + int anchorPos = href.indexOf("#"); + if (anchorPos != -1) + { + anchor = href.substring(anchorPos + 1); + href = href.substring(0, anchorPos); + } + boolean badLink = false; + if (href.startsWith("http")) + { + externalHrefCount++; + if (internetAvailable) + { + if (!connectToUrl(href)) + { + badLink = true; + invalidExternalHrefCount++; + } + } + } + else + { + internalHrefCount++; + File hrefFile = href.equals("") ? htmlFile : new File(htmlFolder, + href); + if (!hrefFile.exists()) + { + badLink = true; + invalidInternalHrefCount++; + } + if (anchor != null) + { + anchorRefCount++; + if (!badLink) + { + if (!checkAnchorExists(hrefFile, anchor)) + { + System.out.println(String.format( + "Invalid anchor: %s at line %d of %s", anchor, + lineNo, getPath(htmlFile))); + } + } + } + } + if (badLink) + { + System.out.println(String.format( + "Invalid href %s at line %d of %s", href, lineNo, + getPath(htmlFile))); + } + } + data = br.readLine(); + } + br.close(); + } + + /** + * Reads the file and checks for the presence of the given html anchor + * + * @param hrefFile + * @param anchor + * @return true if anchor is found else false + */ + private boolean checkAnchorExists(File hrefFile, String anchor) + { + String nameAnchor = " + *
  • each target attribute is in tocTargets
  • + *
  • each url attribute is a valid relative file link
  • + * + * + * @param helpFolder + */ + private Map checkHelpMappings(File helpFolder) + throws IOException + { + Map targets = new HashMap(); + BufferedReader br = new BufferedReader(new FileReader(new File( + helpFolder, HELP_JHM))); + String data = br.readLine(); + int lineNo = 0; + while (data != null) + { + lineNo++; + + /* + * record target, check for duplicates + */ + String target = getAttribute(data, "target"); + if (target != null) + { + mapCount++; + if (targets.containsKey(target)) + { + System.out.println(String.format( + "Duplicate target mapping to %s at line %d of %s", + target, lineNo, HELP_JHM)); + } + else + { + targetCount++; + } + } + + /* + * validate url + */ + String url = getAttribute(data, "url"); + if (url != null) + { + targets.put(target, url); + int anchorPos = url.indexOf("#"); + if (anchorPos != -1) + { + url = url.substring(0, anchorPos); + } + if (!new File(helpFolder, url).exists()) + { + System.out.println(String.format( + "Invalid url path '%s' at line %d of %s", url, lineNo, + HELP_JHM)); + invalidMapUrlCount++; + } + } + data = br.readLine(); + } + br.close(); + return targets; + } + + /** + * Reads file helpTOC.xml and reports any invalid targets + * + * @param helpFolder + * @param tocTargets + * @param unusedTargets + * used targets are removed from this map + * + * @return + * @throws IOException + */ + private void checkTableOfContents(File helpFolder, + Map tocTargets, Map unusedTargets) + throws IOException + { + BufferedReader br = new BufferedReader(new FileReader(new File( + helpFolder, HELP_TOC_XML))); + String data = br.readLine(); + int lineNo = 0; + while (data != null) + { + lineNo++; + /* + * assuming no more than one "target" per line of file here + */ + String target = getAttribute(data, "target"); + if (target != null) + { + unusedTargets.remove(target); + if (!tocTargets.containsKey(target)) + { + System.out.println(String.format( + "Invalid target '%s' at line %d of %s", target, lineNo, + HELP_TOC_XML)); + invalidTargetCount++; + } + } + data = br.readLine(); + } + br.close(); + } + + /** + * Returns the value of an attribute if found in the data, else null + * + * @param data + * @param attName + * @return + */ + private static String getAttribute(String data, String attName) + { + /* + * make a partial attempt at ignoring within + * (doesn't work if multi-line) + */ + int commentStartPos = data.indexOf(""); + String value = null; + String match = attName + "=\""; + int attPos = data.indexOf(match); + if (attPos > 0 + && (commentStartPos == -1 || attPos < commentStartPos || attPos > commentEndPos)) + { + data = data.substring(attPos + match.length()); + value = data.substring(0, data.indexOf("\"")); + } + return value; + } +} -- 1.7.10.2