#!/bin/sh -e # # # DASHT-QUERY-HTML 1 2020-05-16 2.4.0 # # ## NAME # # dasht-query-html - searches [Dash] docsets and emits HTML table rows # # ## SYNOPSIS # # `dasht-query-html` [*PATTERN*] [*DOCSET*]... # # ### Examples # # `dasht-query-html` # Topics (A-Z) from each installed docset. # # `dasht-query-html` 'c - x' # Search for "c - x" in all installed docsets. # # `dasht-query-html` 'c - x' bash # Search for "c - x" only in the "bash" docset. # # `dasht-query-html` 'c - x' bash css # Search for "c - x" only in the "bash" and "css" docsets. # # ## DESCRIPTION # # Searches for *PATTERN* in all installed [Dash] docsets, optionally searching # only in those whose names match *DOCSET*s, by calling dasht-query-line(1). # The results are then printed, one per line, to stdout as HTML table rows. # However, if no results were found, this program exits with a nonzero status. # # ### Searching # # Whitespace characters in *PATTERN* are treated as wildcards, whereas the # SQL LIKE wildcard characters `%` and `_` are not: they are taken literally. # # Before searching, *PATTERN* is surrounded by whitespace wildcards so that it # can match anywhere: beginning, middle, or end. As a result, if *PATTERN* is # undefined, it becomes a whitespace wildcard and thereby matches everything. # # ## ENVIRONMENT # # `DASHT_DOCSETS_DIR` # Defines the filesystem location where your [Dash] docsets are installed. # If undefined, its value is assumed to be `$XDG_DATA_HOME/dasht/docsets/` # or, if `XDG_DATA_HOME` is undefined, `$HOME/.local/share/dasht/docsets/`. # # ## EXIT STATUS # # 44 # No results were found. # # ## SEE ALSO # # dasht-query-line(1), dasht-docsets(1), dasht(1), [Dash] # # [Dash]: https://kapeli.com/dash # # ## AUTHOR # # Written in 2016 by Suraj N. Kurapati # Distributed under the terms of the ISC license (refer to README file). trap 'exit 44' USR1 # exit with a nonzero status when no results found { dasht-query-line "$@" || kill -s USR1 $$ ;} | awk -v pattern="$1" ' # Escapes XML "predefined entities" in given parameter. # See http://www.w3.org/TR/REC-xml/#sec-predefined-ent function escape(xml) { gsub("&", "\\&", xml) gsub("\"", "\\"", xml) gsub("\047", "\\'", xml) # https://www.w3.org/TR/xhtml1/#C_16 gsub("<", "\\<", xml) gsub(">", "\\>", xml) return xml } # Inserts the given breaker at the end of capitalized words, runs of # lowercase text, numbers, and punctuation marks in the given string. # Characters in the extras string are considered to be part of words. function wordbreak(string, breaker, extras) { gsub("[" extras "[:upper:]]+[" extras "[:lower:]]*|"\ "[" extras "[:lower:]]+|"\ "[" extras "[:digit:]]+|"\ "[" extras "[:punct:]]+", "&" breaker, string) return string } # Try to fetch the answer from a cache to avoid repeating the same work. function wordbreak_cached(key, breaker, extras) { if (val = wordbreak_cache[key]) {} else { val = wordbreak_cache[key] = wordbreak(key, breaker, extras) } return val } # Transforms alphabetical characters into bracketed regular expressions # that match either lowercase or uppercase versions of those characters. # This basically emulates the IGNORECASE feature in a POSIX environment. function ignorecase(regex) { buf = "" tmp = regex while (pos = match(tmp, "[[:alpha:]]")) { chr = substr(tmp, pos, 1) buf = buf substr(tmp, 1, pos - 1) "[" tolower(chr) toupper(chr) "]" tmp = substr(tmp, pos + 1) } return buf tmp } BEGIN { gsub("[\\^.[$()|*+?{]", "\\\\&", pattern) # escape normal regex(7) syntax sub("^[[:space:]]+", "", pattern) # strip leading whitespace sub("[[:space:]]+$", "", pattern) # strip trailing whitespace gsub("[[:space:]]+", ".*", pattern) # treat whitespace as wildcards pattern = ignorecase(pattern) # emulate IGNORECASE=1 for POSIX if (pattern == "") pattern = "^." # grouped by leading character FS = "\t" # fields are separated by tabs } NR == 1 { print "" } { name = $1 from = wordbreak_cached($2, "") type = $3 url = $4 # mark search terms with STX and ETX bytes which are ignored by escape() if (pattern) { gsub(pattern, "\002&\003", name) } # mark word-wrappable points with VT bytes which are ignored by escape() name = wordbreak(name, "\v", "\002\003") # escape XML entities in search result to make them visible in browsers name = escape(name) # insert word-break opportunity tags at points marked by VT bytes gsub("\v", "", name) # highlight search terms in search result using the STX and ETX markers if (pattern) { gsub("\002", "", name) gsub("\003", "", name) } print \ ""\ ""\ ""\ ""\ "" } END { if (NR > 0) { print "
" name "" from "" tolower(type) "
" if (NR == 1) { # there was only one search result, so automatically visit its url print "" } } } '