#!/bin/sh -e
#
# # DASHT-QUERY-HTML 1 2020-05-16 2.4.0
#
# ## NAME
#
# dasht-query-html - searches [Dash] docsets and emits HTML table rows
#
# ## SYNOPSIS
#
# `dasht-query-html` [*PATTERN*] [*DOCSET*]...
#
# ### Examples
#
# `dasht-query-html`
# Topics (A-Z) from each installed docset.
#
# `dasht-query-html` 'c - x'
# Search for "c - x" in all installed docsets.
#
# `dasht-query-html` 'c - x' bash
# Search for "c - x" only in the "bash" docset.
#
# `dasht-query-html` 'c - x' bash css
# Search for "c - x" only in the "bash" and "css" docsets.
#
# ## DESCRIPTION
#
# Searches for *PATTERN* in all installed [Dash] docsets, optionally searching
# only in those whose names match *DOCSET*s, by calling dasht-query-line(1).
# The results are then printed, one per line, to stdout as HTML table rows.
# However, if no results were found, this program exits with a nonzero status.
#
# ### Searching
#
# Whitespace characters in *PATTERN* are treated as wildcards, whereas the
# SQL LIKE wildcard characters `%` and `_` are not: they are taken literally.
#
# Before searching, *PATTERN* is surrounded by whitespace wildcards so that it
# can match anywhere: beginning, middle, or end. As a result, if *PATTERN* is
# undefined, it becomes a whitespace wildcard and thereby matches everything.
#
# ## ENVIRONMENT
#
# `DASHT_DOCSETS_DIR`
# Defines the filesystem location where your [Dash] docsets are installed.
# If undefined, its value is assumed to be `$XDG_DATA_HOME/dasht/docsets/`
# or, if `XDG_DATA_HOME` is undefined, `$HOME/.local/share/dasht/docsets/`.
#
# ## EXIT STATUS
#
# 44
# No results were found.
#
# ## SEE ALSO
#
# dasht-query-line(1), dasht-docsets(1), dasht(1), [Dash]
#
# [Dash]: https://kapeli.com/dash
#
# ## AUTHOR
#
# Written in 2016 by Suraj N. Kurapati
# Distributed under the terms of the ISC license (refer to README file).
trap 'exit 44' USR1 # exit with a nonzero status when no results found
{ dasht-query-line "$@" || kill -s USR1 $$ ;} | awk -v pattern="$1" '
# Escapes XML "predefined entities" in given parameter.
# See http://www.w3.org/TR/REC-xml/#sec-predefined-ent
function escape(xml) {
gsub("&", "\\&", xml)
gsub("\"", "\\"", xml)
gsub("\047", "\\'", xml) # https://www.w3.org/TR/xhtml1/#C_16
gsub("<", "\\<", xml)
gsub(">", "\\>", xml)
return xml
}
# Inserts the given breaker at the end of capitalized words, runs of
# lowercase text, numbers, and punctuation marks in the given string.
# Characters in the extras string are considered to be part of words.
function wordbreak(string, breaker, extras) {
gsub("[" extras "[:upper:]]+[" extras "[:lower:]]*|"\
"[" extras "[:lower:]]+|"\
"[" extras "[:digit:]]+|"\
"[" extras "[:punct:]]+", "&" breaker, string)
return string
}
# Try to fetch the answer from a cache to avoid repeating the same work.
function wordbreak_cached(key, breaker, extras) {
if (val = wordbreak_cache[key]) {}
else { val = wordbreak_cache[key] = wordbreak(key, breaker, extras) }
return val
}
# Transforms alphabetical characters into bracketed regular expressions
# that match either lowercase or uppercase versions of those characters.
# This basically emulates the IGNORECASE feature in a POSIX environment.
function ignorecase(regex) {
buf = ""
tmp = regex
while (pos = match(tmp, "[[:alpha:]]")) {
chr = substr(tmp, pos, 1)
buf = buf substr(tmp, 1, pos - 1) "[" tolower(chr) toupper(chr) "]"
tmp = substr(tmp, pos + 1)
}
return buf tmp
}
BEGIN {
gsub("[\\^.[$()|*+?{]", "\\\\&", pattern) # escape normal regex(7) syntax
sub("^[[:space:]]+", "", pattern) # strip leading whitespace
sub("[[:space:]]+$", "", pattern) # strip trailing whitespace
gsub("[[:space:]]+", ".*", pattern) # treat whitespace as wildcards
pattern = ignorecase(pattern) # emulate IGNORECASE=1 for POSIX
if (pattern == "") pattern = "^." # grouped by leading character
FS = "\t" # fields are separated by tabs
}
NR == 1 {
print ""
}
{
name = $1
from = wordbreak_cached($2, "")
type = $3
url = $4
# mark search terms with STX and ETX bytes which are ignored by escape()
if (pattern) {
gsub(pattern, "\002&\003", name)
}
# mark word-wrappable points with VT bytes which are ignored by escape()
name = wordbreak(name, "\v", "\002\003")
# escape XML entities in search result to make them visible in browsers
name = escape(name)
# insert word-break opportunity tags at points marked by VT bytes
gsub("\v", "", name)
# highlight search terms in search result using the STX and ETX markers
if (pattern) {
gsub("\002", "", name)
gsub("\003", "", name)
}
print \
""\
"" name " | "\
"" from " | "\
"" tolower(type) " | "\
"
"
}
END {
if (NR > 0) {
print "
"
if (NR == 1) {
# there was only one search result, so automatically visit its url
print ""
}
}
}
'