b0ca50fb4d
The previous commit fixed the problem of allowing apropos to not crash and produce output even if the database is missing values for certain mandatory fields, such as name, section etc. Normally we don't expect those values to be missing in the database but in case of parsing errors it can happen. However, the machine architecture is an optional field since not all man pages are hardware specific so that should be allowed to be set to NULL if not present in the database.
1166 lines
30 KiB
C
1166 lines
30 KiB
C
/* $NetBSD: apropos-utils.c,v 1.47 2019/08/18 09:14:30 abhinav Exp $ */
|
|
/*-
|
|
* Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
|
|
* All rights reserved.
|
|
*
|
|
* This code was developed as part of Google's Summer of Code 2011 program.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__RCSID("$NetBSD: apropos-utils.c,v 1.47 2019/08/18 09:14:30 abhinav Exp $");
|
|
|
|
#include <sys/queue.h>
|
|
#include <sys/stat.h>
|
|
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <err.h>
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <util.h>
|
|
#include <zlib.h>
|
|
#include <term.h>
|
|
#include <unistd.h>
|
|
#undef tab // XXX: manconf.h
|
|
|
|
#include "apropos-utils.h"
|
|
#include "custom_apropos_tokenizer.h"
|
|
#include "manconf.h"
|
|
#include "fts3_tokenizer.h"
|
|
|
|
typedef struct orig_callback_data {
|
|
void *data;
|
|
int (*callback) (query_callback_args*);
|
|
} orig_callback_data;
|
|
|
|
typedef struct inverse_document_frequency {
|
|
double value;
|
|
int status;
|
|
} inverse_document_frequency;
|
|
|
|
/* weights for individual columns */
|
|
static const double col_weights[] = {
|
|
2.0, // NAME
|
|
2.00, // Name-description
|
|
0.55, // DESCRIPTION
|
|
0.10, // LIBRARY
|
|
0.001, //RETURN VALUES
|
|
0.20, //ENVIRONMENT
|
|
0.01, //FILES
|
|
0.001, //EXIT STATUS
|
|
2.00, //DIAGNOSTICS
|
|
0.05, //ERRORS
|
|
0.00, //md5_hash
|
|
1.00 //machine
|
|
};
|
|
|
|
#ifndef APROPOS_DEBUG
|
|
static int
|
|
register_tokenizer(sqlite3 *db)
|
|
{
|
|
int rc;
|
|
sqlite3_stmt *stmt;
|
|
const sqlite3_tokenizer_module *p;
|
|
const char *name = "custom_apropos_tokenizer";
|
|
get_custom_apropos_tokenizer(&p);
|
|
const char *sql = "SELECT fts3_tokenizer(?, ?)";
|
|
|
|
sqlite3_db_config(db, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0);
|
|
rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
|
|
if (rc != SQLITE_OK)
|
|
return rc;
|
|
|
|
sqlite3_bind_text(stmt, 1, name, -1, SQLITE_STATIC);
|
|
sqlite3_bind_blob(stmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
|
sqlite3_step(stmt);
|
|
|
|
return sqlite3_finalize(stmt);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* lower --
|
|
* Converts the string str to lower case
|
|
*/
|
|
char *
|
|
lower(char *str)
|
|
{
|
|
assert(str);
|
|
int i = 0;
|
|
char c;
|
|
while ((c = str[i]) != '\0')
|
|
str[i++] = tolower((unsigned char) c);
|
|
return str;
|
|
}
|
|
|
|
/*
|
|
* concat--
|
|
* Utility function. Concatenates together: dst, a space character and src.
|
|
* dst + " " + src
|
|
*/
|
|
void
|
|
concat(char **dst, const char *src)
|
|
{
|
|
concat2(dst, src, strlen(src));
|
|
}
|
|
|
|
void
|
|
concat2(char **dst, const char *src, size_t srclen)
|
|
{
|
|
size_t totallen, dstlen;
|
|
char *mydst = *dst;
|
|
assert(src != NULL);
|
|
|
|
/*
|
|
* If destination buffer dst is NULL, then simply
|
|
* strdup the source buffer
|
|
*/
|
|
if (mydst == NULL) {
|
|
mydst = estrndup(src, srclen);
|
|
*dst = mydst;
|
|
return;
|
|
}
|
|
|
|
dstlen = strlen(mydst);
|
|
/*
|
|
* NUL Byte and separator space
|
|
*/
|
|
totallen = dstlen + srclen + 2;
|
|
|
|
mydst = erealloc(mydst, totallen);
|
|
|
|
/* Append a space at the end of dst */
|
|
mydst[dstlen++] = ' ';
|
|
|
|
/* Now, copy src at the end of dst */
|
|
memcpy(mydst + dstlen, src, srclen);
|
|
mydst[dstlen + srclen] = '\0';
|
|
*dst = mydst;
|
|
}
|
|
|
|
void
|
|
close_db(sqlite3 *db)
|
|
{
|
|
sqlite3_close(db);
|
|
sqlite3_shutdown();
|
|
}
|
|
|
|
/*
|
|
* create_db --
|
|
* Creates the database schema.
|
|
*/
|
|
static int
|
|
create_db(sqlite3 *db)
|
|
{
|
|
const char *sqlstr = NULL;
|
|
char *schemasql;
|
|
char *errmsg = NULL;
|
|
|
|
/*------------------------ Create the tables------------------------------*/
|
|
|
|
#if NOTYET
|
|
sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL);
|
|
#else
|
|
sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL);
|
|
#endif
|
|
|
|
schemasql = sqlite3_mprintf("PRAGMA user_version = %d",
|
|
APROPOS_SCHEMA_VERSION);
|
|
sqlite3_exec(db, schemasql, NULL, NULL, &errmsg);
|
|
if (errmsg != NULL)
|
|
goto out;
|
|
sqlite3_free(schemasql);
|
|
|
|
sqlstr =
|
|
//mandb
|
|
"CREATE VIRTUAL TABLE mandb USING fts4(section, name, "
|
|
"name_desc, desc, lib, return_vals, env, files, "
|
|
"exit_status, diagnostics, errors, md5_hash UNIQUE, machine, "
|
|
#ifndef APROPOS_DEBUG
|
|
"compress=zip, uncompress=unzip, tokenize=custom_apropos_tokenizer, "
|
|
#else
|
|
"tokenize=porter, "
|
|
#endif
|
|
"notindexed=section, notindexed=md5_hash); "
|
|
//mandb_meta
|
|
"CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, "
|
|
"file UNIQUE, md5_hash UNIQUE, id INTEGER PRIMARY KEY); "
|
|
//mandb_links
|
|
"CREATE TABLE IF NOT EXISTS mandb_links(link COLLATE NOCASE, target, section, "
|
|
"machine, md5_hash, name_desc); ";
|
|
|
|
sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
|
|
if (errmsg != NULL)
|
|
goto out;
|
|
|
|
sqlstr =
|
|
"CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links "
|
|
"(link); "
|
|
"CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta "
|
|
"(device, inode); "
|
|
"CREATE INDEX IF NOT EXISTS index_mandb_links_md5 ON mandb_links "
|
|
"(md5_hash);";
|
|
sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
|
|
if (errmsg != NULL)
|
|
goto out;
|
|
return 0;
|
|
|
|
out:
|
|
warnx("%s", errmsg);
|
|
free(errmsg);
|
|
sqlite3_close(db);
|
|
sqlite3_shutdown();
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* zip --
|
|
* User defined Sqlite function to compress the FTS table
|
|
*/
|
|
static void
|
|
zip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
|
|
{
|
|
int nin;
|
|
long int nout;
|
|
const unsigned char * inbuf;
|
|
unsigned char *outbuf;
|
|
|
|
assert(nval == 1);
|
|
nin = sqlite3_value_bytes(apval[0]);
|
|
inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]);
|
|
nout = nin + 13 + (nin + 999) / 1000;
|
|
outbuf = emalloc(nout);
|
|
compress(outbuf, (unsigned long *) &nout, inbuf, nin);
|
|
sqlite3_result_blob(pctx, outbuf, nout, free);
|
|
}
|
|
|
|
/*
|
|
* unzip --
|
|
* User defined Sqlite function to uncompress the FTS table.
|
|
*/
|
|
static void
|
|
unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
|
|
{
|
|
unsigned int rc;
|
|
unsigned char *outbuf;
|
|
z_stream stream;
|
|
long total_out;
|
|
|
|
assert(nval == 1);
|
|
memset(&stream, 0, sizeof(stream));
|
|
stream.next_in = __UNCONST(sqlite3_value_blob(apval[0]));
|
|
stream.avail_in = sqlite3_value_bytes(apval[0]);
|
|
stream.zalloc = NULL;
|
|
stream.zfree = NULL;
|
|
|
|
if (inflateInit(&stream) != Z_OK) {
|
|
return;
|
|
}
|
|
|
|
total_out = stream.avail_out = stream.avail_in * 2 + 100;
|
|
stream.next_out = outbuf = emalloc(stream.avail_out);
|
|
while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) {
|
|
if (rc != Z_OK ||
|
|
(stream.avail_out != 0 && stream.avail_in == 0)) {
|
|
free(outbuf);
|
|
return;
|
|
}
|
|
total_out <<= 1;
|
|
outbuf = erealloc(outbuf, total_out);
|
|
stream.next_out = outbuf + stream.total_out;
|
|
stream.avail_out = total_out - stream.total_out;
|
|
}
|
|
if (inflateEnd(&stream) != Z_OK) {
|
|
free(outbuf);
|
|
return;
|
|
}
|
|
if (stream.total_out == 0) {
|
|
free(outbuf);
|
|
return;
|
|
}
|
|
outbuf = erealloc(outbuf, stream.total_out);
|
|
sqlite3_result_text(pctx, (const char *)outbuf, stream.total_out, free);
|
|
}
|
|
|
|
/*
|
|
* get_dbpath --
|
|
* Read the path of the database from man.conf and return.
|
|
*/
|
|
char *
|
|
get_dbpath(const char *manconf)
|
|
{
|
|
TAG *tp;
|
|
char *dbpath;
|
|
|
|
config(manconf);
|
|
tp = gettag("_mandb", 1);
|
|
if (!tp)
|
|
return NULL;
|
|
|
|
if (TAILQ_EMPTY(&tp->entrylist))
|
|
return NULL;
|
|
|
|
dbpath = TAILQ_LAST(&tp->entrylist, tqh)->s;
|
|
return dbpath;
|
|
}
|
|
|
|
/* init_db --
|
|
* Prepare the database. Register the compress/uncompress functions and the
|
|
* stopword tokenizer.
|
|
* db_flag specifies the mode in which to open the database. 3 options are
|
|
* available:
|
|
* 1. DB_READONLY: Open in READONLY mode. An error if db does not exist.
|
|
* 2. DB_READWRITE: Open in read-write mode. An error if db does not exist.
|
|
* 3. DB_CREATE: Open in read-write mode. It will try to create the db if
|
|
* it does not exist already.
|
|
* RETURN VALUES:
|
|
* The function will return NULL in case the db does not exist
|
|
* and DB_CREATE
|
|
* was not specified. And in case DB_CREATE was specified and yet NULL is
|
|
* returned, then there was some other error.
|
|
* In normal cases the function should return a handle to the db.
|
|
*/
|
|
sqlite3 *
|
|
init_db(mandb_access_mode db_flag, const char *manconf)
|
|
{
|
|
sqlite3 *db = NULL;
|
|
sqlite3_stmt *stmt;
|
|
struct stat sb;
|
|
int rc;
|
|
int create_db_flag = 0;
|
|
|
|
char *dbpath = get_dbpath(manconf);
|
|
if (dbpath == NULL)
|
|
errx(EXIT_FAILURE, "_mandb entry not found in man.conf");
|
|
|
|
if (!(stat(dbpath, &sb) == 0 && S_ISREG(sb.st_mode))) {
|
|
/* Database does not exist, check if DB_CREATE was specified,
|
|
* and set flag to create the database schema
|
|
*/
|
|
if (db_flag != (MANDB_CREATE)) {
|
|
warnx("Missing apropos database. "
|
|
"Please run makemandb to create it.");
|
|
return NULL;
|
|
}
|
|
create_db_flag = 1;
|
|
} else {
|
|
/*
|
|
* Database exists. Check if we have the permissions
|
|
* to read/write the files
|
|
*/
|
|
int access_mode = R_OK;
|
|
switch (db_flag) {
|
|
case MANDB_CREATE:
|
|
case MANDB_WRITE:
|
|
access_mode |= W_OK;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if ((access(dbpath, access_mode)) != 0) {
|
|
warnx("Unable to access the database, please check"
|
|
" permissions for `%s'", dbpath);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
sqlite3_initialize();
|
|
rc = sqlite3_open_v2(dbpath, &db, db_flag, NULL);
|
|
|
|
if (rc != SQLITE_OK) {
|
|
warnx("%s", sqlite3_errmsg(db));
|
|
goto error;
|
|
}
|
|
|
|
sqlite3_extended_result_codes(db, 1);
|
|
|
|
#ifndef APROPOS_DEBUG
|
|
rc = register_tokenizer(db);
|
|
if (rc != SQLITE_OK) {
|
|
warnx("Unable to register custom tokenizer: %s", sqlite3_errmsg(db));
|
|
goto error;
|
|
}
|
|
#endif
|
|
|
|
if (create_db_flag && create_db(db) < 0) {
|
|
warnx("%s", "Unable to create database schema");
|
|
goto error;
|
|
}
|
|
|
|
rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL);
|
|
if (rc != SQLITE_OK) {
|
|
warnx("Unable to query schema version: %s",
|
|
sqlite3_errmsg(db));
|
|
goto error;
|
|
}
|
|
if (sqlite3_step(stmt) != SQLITE_ROW) {
|
|
sqlite3_finalize(stmt);
|
|
warnx("Unable to query schema version: %s",
|
|
sqlite3_errmsg(db));
|
|
goto error;
|
|
}
|
|
if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) {
|
|
sqlite3_finalize(stmt);
|
|
warnx("Incorrect schema version found. "
|
|
"Please run makemandb -f.");
|
|
goto error;
|
|
}
|
|
sqlite3_finalize(stmt);
|
|
|
|
|
|
/* Register the zip and unzip functions for FTS compression */
|
|
rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip,
|
|
NULL, NULL);
|
|
if (rc != SQLITE_OK) {
|
|
warnx("Unable to register function: compress: %s",
|
|
sqlite3_errmsg(db));
|
|
goto error;
|
|
}
|
|
|
|
rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL,
|
|
unzip, NULL, NULL);
|
|
if (rc != SQLITE_OK) {
|
|
warnx("Unable to register function: uncompress: %s",
|
|
sqlite3_errmsg(db));
|
|
goto error;
|
|
}
|
|
return db;
|
|
|
|
error:
|
|
close_db(db);
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* rank_func --
|
|
* Sqlite user defined function for ranking the documents.
|
|
* For each phrase of the query, it computes the tf and idf and adds them over.
|
|
* It computes the final rank, by multiplying tf and idf together.
|
|
* Weight of term t for document d = (term frequency of t in d *
|
|
* inverse document frequency of t)
|
|
*
|
|
* Term Frequency of term t in document d = Number of times t occurs in d /
|
|
* Number of times t appears in all documents
|
|
*
|
|
* Inverse document frequency of t = log(Total number of documents /
|
|
* Number of documents in which t occurs)
|
|
*/
|
|
static void
|
|
rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval)
|
|
{
|
|
inverse_document_frequency *idf = sqlite3_user_data(pctx);
|
|
double tf = 0.0;
|
|
const unsigned int *matchinfo;
|
|
int ncol;
|
|
int nphrase;
|
|
int iphrase;
|
|
int ndoc;
|
|
int doclen = 0;
|
|
const double k = 3.75;
|
|
/*
|
|
* Check that the number of arguments passed to this
|
|
* function is correct.
|
|
*/
|
|
assert(nval == 1);
|
|
|
|
matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]);
|
|
nphrase = matchinfo[0];
|
|
ncol = matchinfo[1];
|
|
ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol];
|
|
for (iphrase = 0; iphrase < nphrase; iphrase++) {
|
|
int icol;
|
|
const unsigned int *phraseinfo =
|
|
&matchinfo[2 + ncol + iphrase * ncol * 3];
|
|
for(icol = 1; icol < ncol; icol++) {
|
|
|
|
/* nhitcount: number of times the current phrase occurs
|
|
* in the current column in the current document.
|
|
* nglobalhitcount: number of times current phrase
|
|
* occurs in the current column in all documents.
|
|
* ndocshitcount: number of documents in which the
|
|
* current phrase occurs in the current column at
|
|
* least once.
|
|
*/
|
|
int nhitcount = phraseinfo[3 * icol];
|
|
int nglobalhitcount = phraseinfo[3 * icol + 1];
|
|
int ndocshitcount = phraseinfo[3 * icol + 2];
|
|
doclen = matchinfo[2 + icol ];
|
|
double weight = col_weights[icol - 1];
|
|
if (idf->status == 0 && ndocshitcount)
|
|
idf->value +=
|
|
log(((double)ndoc / ndocshitcount))* weight;
|
|
|
|
/*
|
|
* Dividing the tf by document length to normalize
|
|
* the effect of longer documents.
|
|
*/
|
|
if (nglobalhitcount > 0 && nhitcount)
|
|
tf += (((double)nhitcount * weight)
|
|
/ (nglobalhitcount * doclen));
|
|
}
|
|
}
|
|
idf->status = 1;
|
|
|
|
/*
|
|
* Final score: Dividing by k + tf further normalizes the weight
|
|
* leading to better results. The value of k is experimental
|
|
*/
|
|
double score = (tf * idf->value) / (k + tf);
|
|
sqlite3_result_double(pctx, score);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* generates sql query for matching the user entered query
|
|
*/
|
|
static char *
|
|
generate_search_query(query_args *args, const char *snippet_args[3])
|
|
{
|
|
const char *default_snippet_args[3];
|
|
char *section_clause = NULL;
|
|
char *limit_clause = NULL;
|
|
char *machine_clause = NULL;
|
|
char *query = NULL;
|
|
|
|
if (args->machine) {
|
|
machine_clause = sqlite3_mprintf("AND mandb.machine=%Q", args->machine);
|
|
if (machine_clause == NULL)
|
|
goto RETURN;
|
|
}
|
|
|
|
if (args->nrec >= 0) {
|
|
/* Use the provided number of records and offset */
|
|
limit_clause = sqlite3_mprintf(" LIMIT %d OFFSET %d",
|
|
args->nrec, args->offset);
|
|
if (limit_clause == NULL)
|
|
goto RETURN;
|
|
}
|
|
|
|
/* We want to build a query of the form: "select x,y,z from mandb where
|
|
* mandb match :query [AND (section IN ('1', '2')]
|
|
* ORDER BY rank DESC [LIMIT 10 OFFSET 0]"
|
|
* NOTES:
|
|
* 1. The portion in first pair of square brackets is optional.
|
|
* It will be there only if the user has specified an option
|
|
* to search in one or more specific sections.
|
|
* 2. The LIMIT portion will be there if the user has specified
|
|
* a limit using the -n option.
|
|
*/
|
|
if (args->sections && args->sections[0]) {
|
|
concat(§ion_clause, " AND mandb.section IN (");
|
|
for (size_t i = 0; args->sections[i]; i++) {
|
|
char *temp;
|
|
char c = args->sections[i + 1]? ',': ')';
|
|
if ((temp = sqlite3_mprintf("%Q%c", args->sections[i], c)) == NULL)
|
|
goto RETURN;
|
|
concat(§ion_clause, temp);
|
|
sqlite3_free(temp);
|
|
}
|
|
}
|
|
|
|
if (snippet_args == NULL) {
|
|
default_snippet_args[0] = "";
|
|
default_snippet_args[1] = "";
|
|
default_snippet_args[2] = "...";
|
|
snippet_args = default_snippet_args;
|
|
}
|
|
|
|
if (args->legacy) {
|
|
char *wild;
|
|
easprintf(&wild, "%%%s%%", args->search_str);
|
|
query = sqlite3_mprintf("SELECT section, name, name_desc, machine"
|
|
" FROM mandb"
|
|
" WHERE name LIKE %Q OR name_desc LIKE %Q "
|
|
"%s"
|
|
"%s",
|
|
wild, wild,
|
|
section_clause ? section_clause : "",
|
|
limit_clause ? limit_clause : "");
|
|
free(wild);
|
|
} else if (strchr(args->search_str, ' ') == NULL) {
|
|
/*
|
|
* If it's a single word query, we want to search in the
|
|
* links table as well. If the link table contains an entry
|
|
* for the queried keyword, we want to use that as the name of
|
|
* the man page.
|
|
* For example, for `apropos realloc` the output should be
|
|
* realloc(3) and not malloc(3).
|
|
*/
|
|
query = sqlite3_mprintf(
|
|
"SELECT section, name, name_desc, machine,"
|
|
" snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
|
|
" rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
|
|
" FROM mandb WHERE name NOT IN ("
|
|
" SELECT target FROM mandb_links WHERE link=%Q AND"
|
|
" mandb_links.section=mandb.section) AND mandb MATCH %Q %s %s"
|
|
" UNION"
|
|
" SELECT mandb.section, mandb_links.link AS name, mandb.name_desc,"
|
|
" mandb.machine, '' AS snippet, 100.00 AS rank"
|
|
" FROM mandb JOIN mandb_links ON mandb.name=mandb_links.target and"
|
|
" mandb.section=mandb_links.section WHERE mandb_links.link=%Q"
|
|
" %s %s"
|
|
" ORDER BY rank DESC %s",
|
|
snippet_args[0], snippet_args[1], snippet_args[2],
|
|
args->search_str, args->search_str, section_clause ? section_clause : "",
|
|
machine_clause ? machine_clause : "", args->search_str,
|
|
machine_clause ? machine_clause : "",
|
|
section_clause ? section_clause : "",
|
|
limit_clause ? limit_clause : "");
|
|
} else {
|
|
query = sqlite3_mprintf("SELECT section, name, name_desc, machine,"
|
|
" snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
|
|
" rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
|
|
" FROM mandb"
|
|
" WHERE mandb MATCH %Q %s "
|
|
"%s"
|
|
" ORDER BY rank DESC"
|
|
"%s",
|
|
snippet_args[0], snippet_args[1], snippet_args[2],
|
|
args->search_str, machine_clause ? machine_clause : "",
|
|
section_clause ? section_clause : "",
|
|
limit_clause ? limit_clause : "");
|
|
}
|
|
|
|
RETURN:
|
|
sqlite3_free(machine_clause);
|
|
free(section_clause);
|
|
sqlite3_free(limit_clause);
|
|
return query;
|
|
}
|
|
|
|
static const char *
|
|
get_stmt_col_text(sqlite3_stmt *stmt, int col)
|
|
{
|
|
const char *t = (const char *) sqlite3_column_text(stmt, col);
|
|
return t == NULL ? "*?*" : t;
|
|
}
|
|
|
|
/*
|
|
* Execute the full text search query and return the number of results
|
|
* obtained.
|
|
*/
|
|
static unsigned int
|
|
execute_search_query(sqlite3 *db, char *query, query_args *args)
|
|
{
|
|
sqlite3_stmt *stmt;
|
|
char *name;
|
|
char *slash_ptr;
|
|
const char *name_temp;
|
|
char *m = NULL;
|
|
int rc;
|
|
query_callback_args callback_args;
|
|
inverse_document_frequency idf = {0, 0};
|
|
|
|
if (!args->legacy) {
|
|
/* Register the rank function */
|
|
rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY,
|
|
(void *) &idf, rank_func, NULL, NULL);
|
|
if (rc != SQLITE_OK) {
|
|
warnx("Unable to register the ranking function: %s",
|
|
sqlite3_errmsg(db));
|
|
sqlite3_close(db);
|
|
sqlite3_shutdown();
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL);
|
|
if (rc == SQLITE_IOERR) {
|
|
warnx("Corrupt database. Please rerun makemandb");
|
|
return -1;
|
|
} else if (rc != SQLITE_OK) {
|
|
warnx("%s", sqlite3_errmsg(db));
|
|
return -1;
|
|
}
|
|
|
|
unsigned int nresults = 0;
|
|
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
|
nresults++;
|
|
callback_args.section = get_stmt_col_text(stmt, 0);
|
|
name_temp = get_stmt_col_text(stmt, 1);
|
|
callback_args.name_desc = get_stmt_col_text(stmt, 2);
|
|
callback_args.machine = (const char *) sqlite3_column_text(stmt, 3);
|
|
if (!args->legacy) {
|
|
callback_args.snippet = get_stmt_col_text(stmt, 4);
|
|
callback_args.snippet_length =
|
|
strlen(callback_args.snippet);
|
|
} else {
|
|
callback_args.snippet = "";
|
|
callback_args.snippet_length = 1;
|
|
}
|
|
if ((slash_ptr = strrchr(name_temp, '/')) != NULL)
|
|
name_temp = slash_ptr + 1;
|
|
if (callback_args.machine && callback_args.machine[0]) {
|
|
m = estrdup(callback_args.machine);
|
|
easprintf(&name, "%s/%s", lower(m), name_temp);
|
|
free(m);
|
|
} else {
|
|
name = estrdup(get_stmt_col_text(stmt, 1));
|
|
}
|
|
callback_args.name = name;
|
|
callback_args.other_data = args->callback_data;
|
|
(args->callback)(&callback_args);
|
|
free(name);
|
|
}
|
|
sqlite3_finalize(stmt);
|
|
return nresults;
|
|
}
|
|
|
|
|
|
/*
|
|
* run_query_internal --
|
|
* Performs the searches for the keywords entered by the user.
|
|
* The 2nd param: snippet_args is an array of strings providing values for the
|
|
* last three parameters to the snippet function of sqlite. (Look at the docs).
|
|
* The 3rd param: args contains rest of the search parameters. Look at
|
|
* arpopos-utils.h for the description of individual fields.
|
|
*
|
|
*/
|
|
static int
|
|
run_query_internal(sqlite3 *db, const char *snippet_args[3], query_args *args)
|
|
{
|
|
char *query;
|
|
query = generate_search_query(args, snippet_args);
|
|
if (query == NULL) {
|
|
*args->errmsg = estrdup("malloc failed");
|
|
return -1;
|
|
}
|
|
|
|
execute_search_query(db, query, args);
|
|
sqlite3_free(query);
|
|
return *(args->errmsg) == NULL ? 0 : -1;
|
|
}
|
|
|
|
static char *
|
|
get_escaped_html_string(const char *src, size_t *slen)
|
|
{
|
|
static const char trouble[] = "<>\"&\002\003";
|
|
/*
|
|
* First scan the src to find out the number of occurrences
|
|
* of {'>', '<' '"', '&'}. Then allocate a new buffer with
|
|
* sufficient space to be able to store the quoted versions
|
|
* of the special characters {>, <, ", &}.
|
|
* Copy over the characters from the original src into
|
|
* this buffer while replacing the special characters with
|
|
* their quoted versions.
|
|
*/
|
|
char *dst, *ddst;
|
|
size_t count;
|
|
const char *ssrc;
|
|
|
|
for (count = 0, ssrc = src; *src; count++) {
|
|
size_t sz = strcspn(src, trouble);
|
|
src += sz + 1;
|
|
}
|
|
|
|
|
|
#define append(a) \
|
|
do { \
|
|
memcpy(dst, (a), sizeof(a) - 1); \
|
|
dst += sizeof(a) - 1; \
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
|
ddst = dst = emalloc(*slen + count * 5 + 1);
|
|
for (src = ssrc; *src; src++) {
|
|
switch (*src) {
|
|
case '<':
|
|
append("<");
|
|
break;
|
|
case '>':
|
|
append(">");
|
|
break;
|
|
case '\"':
|
|
append(""");
|
|
break;
|
|
case '&':
|
|
/*
|
|
* Don't perform the quoting if this & is part of
|
|
* an mdoc escape sequence, e.g. \&
|
|
*/
|
|
if (src != ssrc && src[-1] != '\\')
|
|
append("&");
|
|
else
|
|
append("&");
|
|
break;
|
|
case '\002':
|
|
append("<b>");
|
|
break;
|
|
case '\003':
|
|
append("</b>");
|
|
break;
|
|
default:
|
|
*dst++ = *src;
|
|
break;
|
|
}
|
|
}
|
|
*dst = '\0';
|
|
*slen = dst - ddst;
|
|
return ddst;
|
|
}
|
|
|
|
|
|
/*
|
|
* callback_html --
|
|
* Callback function for run_query_html. It builds the html output and then
|
|
* calls the actual user supplied callback function.
|
|
*/
|
|
static int
|
|
callback_html(query_callback_args *callback_args)
|
|
{
|
|
struct orig_callback_data *orig_data = callback_args->other_data;
|
|
int (*callback)(query_callback_args*) = orig_data->callback;
|
|
size_t length = callback_args->snippet_length;
|
|
size_t name_description_length = strlen(callback_args->name_desc);
|
|
char *qsnippet = get_escaped_html_string(callback_args->snippet, &length);
|
|
char *qname_description = get_escaped_html_string(callback_args->name_desc,
|
|
&name_description_length);
|
|
callback_args->name_desc = qname_description;
|
|
callback_args->snippet = qsnippet;
|
|
callback_args->snippet_length = length;
|
|
callback_args->other_data = orig_data->data;
|
|
(*callback)(callback_args);
|
|
free(qsnippet);
|
|
free(qname_description);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* run_query_html --
|
|
* Utility function to output query result in HTML format.
|
|
* It internally calls run_query only, but it first passes the output to its
|
|
* own custom callback function, which preprocess the snippet for quoting
|
|
* inline HTML fragments.
|
|
* After that it delegates the call the actual user supplied callback function.
|
|
*/
|
|
static int
|
|
run_query_html(sqlite3 *db, query_args *args)
|
|
{
|
|
struct orig_callback_data orig_data;
|
|
orig_data.callback = args->callback;
|
|
orig_data.data = args->callback_data;
|
|
const char *snippet_args[] = {"\002", "\003", "..."};
|
|
args->callback = &callback_html;
|
|
args->callback_data = (void *) &orig_data;
|
|
return run_query_internal(db, snippet_args, args);
|
|
}
|
|
|
|
/*
|
|
* underline a string, pager style.
|
|
*/
|
|
static char *
|
|
ul_pager(int ul, const char *s)
|
|
{
|
|
size_t len;
|
|
char *dst, *d;
|
|
|
|
if (!ul)
|
|
return estrdup(s);
|
|
|
|
// a -> _\ba
|
|
len = strlen(s) * 3 + 1;
|
|
|
|
d = dst = emalloc(len);
|
|
while (*s) {
|
|
*d++ = '_';
|
|
*d++ = '\b';
|
|
*d++ = *s++;
|
|
}
|
|
*d = '\0';
|
|
return dst;
|
|
}
|
|
|
|
/*
|
|
* callback_pager --
|
|
* A callback similar to callback_html. It overstrikes the matching text in
|
|
* the snippet so that it appears emboldened when viewed using a pager like
|
|
* more or less.
|
|
*/
|
|
static int
|
|
callback_pager(query_callback_args *callback_args)
|
|
{
|
|
struct orig_callback_data *orig_data = callback_args->other_data;
|
|
char *psnippet;
|
|
const char *temp = callback_args->snippet;
|
|
int count = 0;
|
|
int i = 0, did;
|
|
size_t sz = 0;
|
|
size_t psnippet_length;
|
|
|
|
/* Count the number of bytes of matching text. For each of these
|
|
* bytes we will use 2 extra bytes to overstrike it so that it
|
|
* appears bold when viewed using a pager.
|
|
*/
|
|
while (*temp) {
|
|
sz = strcspn(temp, "\002\003");
|
|
temp += sz;
|
|
if (*temp == '\003') {
|
|
count += 2 * (sz);
|
|
}
|
|
temp++;
|
|
}
|
|
|
|
psnippet_length = callback_args->snippet_length + count;
|
|
psnippet = emalloc(psnippet_length + 1);
|
|
|
|
/* Copy the bytes from snippet to psnippet:
|
|
* 1. Copy the bytes before \002 as it is.
|
|
* 2. The bytes after \002 need to be overstriked till we
|
|
* encounter \003.
|
|
* 3. To overstrike a byte 'A' we need to write 'A\bA'
|
|
*/
|
|
did = 0;
|
|
const char *snippet = callback_args->snippet;
|
|
while (*snippet) {
|
|
sz = strcspn(snippet, "\002");
|
|
memcpy(&psnippet[i], snippet, sz);
|
|
snippet += sz;
|
|
i += sz;
|
|
|
|
/* Don't change this. Advancing the pointer without reading the byte
|
|
* is causing strange behavior.
|
|
*/
|
|
if (*snippet == '\002')
|
|
snippet++;
|
|
while (*snippet && *snippet != '\003') {
|
|
did = 1;
|
|
psnippet[i++] = *snippet;
|
|
psnippet[i++] = '\b';
|
|
psnippet[i++] = *snippet++;
|
|
}
|
|
if (*snippet)
|
|
snippet++;
|
|
}
|
|
|
|
psnippet[i] = 0;
|
|
char *ul_section = ul_pager(did, callback_args->section);
|
|
char *ul_name = ul_pager(did, callback_args->name);
|
|
char *ul_name_desc = ul_pager(did, callback_args->name_desc);
|
|
callback_args->section = ul_section;
|
|
callback_args->name = ul_name;
|
|
callback_args->name_desc = ul_name_desc;
|
|
callback_args->snippet = psnippet;
|
|
callback_args->snippet_length = psnippet_length;
|
|
callback_args->other_data = orig_data->data;
|
|
(orig_data->callback)(callback_args);
|
|
free(ul_section);
|
|
free(ul_name);
|
|
free(ul_name_desc);
|
|
free(psnippet);
|
|
return 0;
|
|
}
|
|
|
|
struct term_args {
|
|
struct orig_callback_data *orig_data;
|
|
const char *smul;
|
|
const char *rmul;
|
|
};
|
|
|
|
/*
|
|
* underline a string, pager style.
|
|
*/
|
|
static char *
|
|
ul_term(const char *s, const struct term_args *ta)
|
|
{
|
|
char *dst;
|
|
|
|
easprintf(&dst, "%s%s%s", ta->smul, s, ta->rmul);
|
|
return dst;
|
|
}
|
|
|
|
/*
|
|
* callback_term --
|
|
* A callback similar to callback_html. It overstrikes the matching text in
|
|
* the snippet so that it appears emboldened when viewed using a pager like
|
|
* more or less.
|
|
*/
|
|
static int
|
|
callback_term(query_callback_args *callback_args)
|
|
{
|
|
struct term_args *ta = callback_args->other_data;
|
|
struct orig_callback_data *orig_data = ta->orig_data;
|
|
|
|
char *ul_section = ul_term(callback_args->section, ta);
|
|
char *ul_name = ul_term(callback_args->name, ta);
|
|
char *ul_name_desc = ul_term(callback_args->name_desc, ta);
|
|
callback_args->section = ul_section;
|
|
callback_args->name = ul_name;
|
|
callback_args->name_desc = ul_name_desc;
|
|
callback_args->other_data = orig_data->data;
|
|
(orig_data->callback)(callback_args);
|
|
free(ul_section);
|
|
free(ul_name);
|
|
free(ul_name_desc);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* run_query_pager --
|
|
* Utility function similar to run_query_html. This function tries to
|
|
* pre-process the result assuming it will be piped to a pager.
|
|
* For this purpose it first calls its own callback function callback_pager
|
|
* which then delegates the call to the user supplied callback.
|
|
*/
|
|
static int
|
|
run_query_pager(sqlite3 *db, query_args *args)
|
|
{
|
|
struct orig_callback_data orig_data;
|
|
orig_data.callback = args->callback;
|
|
orig_data.data = args->callback_data;
|
|
const char *snippet_args[3] = { "\002", "\003", "..." };
|
|
args->callback = &callback_pager;
|
|
args->callback_data = (void *) &orig_data;
|
|
return run_query_internal(db, snippet_args, args);
|
|
}
|
|
|
|
struct nv {
|
|
char *s;
|
|
size_t l;
|
|
};
|
|
|
|
static int
|
|
term_putc(int c, void *p)
|
|
{
|
|
struct nv *nv = p;
|
|
nv->s[nv->l++] = c;
|
|
return 0;
|
|
}
|
|
|
|
static char *
|
|
term_fix_seq(TERMINAL *ti, const char *seq)
|
|
{
|
|
char *res = estrdup(seq);
|
|
struct nv nv;
|
|
|
|
if (ti == NULL)
|
|
return res;
|
|
|
|
nv.s = res;
|
|
nv.l = 0;
|
|
ti_puts(ti, seq, 1, term_putc, &nv);
|
|
nv.s[nv.l] = '\0';
|
|
|
|
return res;
|
|
}
|
|
|
|
static void
|
|
term_init(int fd, const char *sa[5])
|
|
{
|
|
TERMINAL *ti;
|
|
int error;
|
|
const char *bold, *sgr0, *smso, *rmso, *smul, *rmul;
|
|
|
|
if (ti_setupterm(&ti, NULL, fd, &error) == -1) {
|
|
bold = sgr0 = NULL;
|
|
smso = rmso = smul = rmul = "";
|
|
ti = NULL;
|
|
} else {
|
|
bold = ti_getstr(ti, "bold");
|
|
sgr0 = ti_getstr(ti, "sgr0");
|
|
if (bold == NULL || sgr0 == NULL) {
|
|
smso = ti_getstr(ti, "smso");
|
|
|
|
if (smso == NULL ||
|
|
(rmso = ti_getstr(ti, "rmso")) == NULL)
|
|
smso = rmso = "";
|
|
bold = sgr0 = NULL;
|
|
} else
|
|
smso = rmso = "";
|
|
|
|
smul = ti_getstr(ti, "smul");
|
|
if (smul == NULL || (rmul = ti_getstr(ti, "rmul")) == NULL)
|
|
smul = rmul = "";
|
|
}
|
|
|
|
sa[0] = term_fix_seq(ti, bold ? bold : smso);
|
|
sa[1] = term_fix_seq(ti, sgr0 ? sgr0 : rmso);
|
|
sa[2] = estrdup("...");
|
|
sa[3] = term_fix_seq(ti, smul);
|
|
sa[4] = term_fix_seq(ti, rmul);
|
|
|
|
if (ti)
|
|
del_curterm(ti);
|
|
}
|
|
|
|
/*
|
|
* run_query_term --
|
|
* Utility function similar to run_query_html. This function tries to
|
|
* pre-process the result assuming it will be displayed on a terminal
|
|
* For this purpose it first calls its own callback function callback_pager
|
|
* which then delegates the call to the user supplied callback.
|
|
*/
|
|
static int
|
|
run_query_term(sqlite3 *db, query_args *args)
|
|
{
|
|
struct orig_callback_data orig_data;
|
|
struct term_args ta;
|
|
orig_data.callback = args->callback;
|
|
orig_data.data = args->callback_data;
|
|
const char *snippet_args[5];
|
|
|
|
term_init(STDOUT_FILENO, snippet_args);
|
|
ta.smul = snippet_args[3];
|
|
ta.rmul = snippet_args[4];
|
|
ta.orig_data = (void *) &orig_data;
|
|
|
|
args->callback = &callback_term;
|
|
args->callback_data = &ta;
|
|
return run_query_internal(db, snippet_args, args);
|
|
}
|
|
|
|
static int
|
|
run_query_none(sqlite3 *db, query_args *args)
|
|
{
|
|
struct orig_callback_data orig_data;
|
|
orig_data.callback = args->callback;
|
|
orig_data.data = args->callback_data;
|
|
const char *snippet_args[3] = { "", "", "..." };
|
|
args->callback = &callback_pager;
|
|
args->callback_data = (void *) &orig_data;
|
|
return run_query_internal(db, snippet_args, args);
|
|
}
|
|
|
|
int
|
|
run_query(sqlite3 *db, query_format fmt, query_args *args)
|
|
{
|
|
switch (fmt) {
|
|
case APROPOS_NONE:
|
|
return run_query_none(db, args);
|
|
case APROPOS_HTML:
|
|
return run_query_html(db, args);
|
|
case APROPOS_TERM:
|
|
return run_query_term(db, args);
|
|
case APROPOS_PAGER:
|
|
return run_query_pager(db, args);
|
|
default:
|
|
warnx("Unknown query format %d", (int)fmt);
|
|
return -1;
|
|
}
|
|
}
|