Table of contents

Notes Query

%3 cluster_a2643095_f570_492e_9313_4587fdd712da Notes Query cluster_4aaaa691_8737_4f34_b527_5cc6c7086570 Index generator code _88cd5a42_48cf_4fb9_ac5d_e6928ea55427 Dependencies _12ed7694_66ce_49fb_90a8_0fccf986f6e3 Comment index generator code _c5c138df_4d79_442f_933b_db3e4bde6e84 Config _0d320e34_d452_40aa_b736_7d1e0c2e6323 Definitions _36b3ba0d_8965_4faa_a25f_bc08455f4582 Python code _65b53b3b_2af6_451e_a639_b303f842c474 SQLite FTS5 _36b3ba0d_8965_4faa_a25f_bc08455f4582->_65b53b3b_2af6_451e_a639_b303f842c474 _608e00b5_ba2f_4f8a_b6a5_944c8bbe00ee Emacs code _6d0ce466_fd19_4509_8c1f_adc4c1f7b1f9 Search function _e5f5d341_9148_4b6c_8b5a_9aff3a2912e4 Xapian _6d0ce466_fd19_4509_8c1f_adc4c1f7b1f9->_e5f5d341_9148_4b6c_8b5a_9aff3a2912e4 _a9c8dc6d_e1bc_4d20_b403_4d0496fc964d Invoke from Emacs _a9c8dc6d_e1bc_4d20_b403_4d0496fc964d->_6d0ce466_fd19_4509_8c1f_adc4c1f7b1f9 _9a947501_cbf3_485d_b6fd_5db4eea76aca Conquering Kubernetes with Emacs _a9c8dc6d_e1bc_4d20_b403_4d0496fc964d->_9a947501_cbf3_485d_b6fd_5db4eea76aca _a9c8dc6d_e1bc_4d20_b403_4d0496fc964d->__0:cluster_a2643095_f570_492e_9313_4587fdd712da _217f35ce_404e_49ed_bfd3_cb3ca3f312db personal wiki _aa29be89_70e7_4465_91ed_361cf0ce62f2 Emacs _9a947501_cbf3_485d_b6fd_5db4eea76aca->_aa29be89_70e7_4465_91ed_361cf0ce62f2 __1:cluster_a2643095_f570_492e_9313_4587fdd712da->_6d0ce466_fd19_4509_8c1f_adc4c1f7b1f9 __2:cluster_a2643095_f570_492e_9313_4587fdd712da->_65b53b3b_2af6_451e_a639_b303f842c474 __3:cluster_a2643095_f570_492e_9313_4587fdd712da->_e5f5d341_9148_4b6c_8b5a_9aff3a2912e4 __4:cluster_a2643095_f570_492e_9313_4587fdd712da->_217f35ce_404e_49ed_bfd3_cb3ca3f312db __5:cluster_a2643095_f570_492e_9313_4587fdd712da->_aa29be89_70e7_4465_91ed_361cf0ce62f2 __6:cluster_a2643095_f570_492e_9313_4587fdd712da->_9a947501_cbf3_485d_b6fd_5db4eea76aca

Definitions

(setq smp/nq--python-interpreter "python3")
  (setq smp/nq--python-module (file-truename "~/repos/codigoparallevar/scripts/notes-query"))
  (setq smp/nq--sqlitefts-index  (file-truename "~/.cache/plibrarian/db.sqlite3"))

Run (org-babel-tangle) (C-c C-v t) to get the code placed in that path.

Python code

  • This module makes calls to a small python wrapper for SQLite FTS5.

#!/usr/bin/env python3

import sqlite3
import sys
import os
import json

def main(mode, path, query):
    db = sqlite3.connect(path)
    cur = db.cursor()
    cur.execute('SELECT note_id, title, top_level_title, is_done, is_todo FROM note_search(?);', (query,))
    matches = cur.fetchall()

    if mode == 'json':
        results = []
        for rank, (note_id, title, top_level_title, is_done, is_todo) in enumerate(matches):
            data = dict(rank=rank + 1,
                        note_id=note_id,
                        title=title,
                        top_level_title=top_level_title,
                        is_done=bool(is_done),
                        is_todo=bool(is_todo),
                        ) # , percent=match.percent)
            results.append(data)
        print(json.dumps(results))
    elif mode == 'org':
        doc_cache = {}
        for rank, match in enumerate(matches):
            print(f"{rank + 1:2d}) [[id:{match[0]}][{match[1].strip()} ({match[2]})]]")

if __name__ == '__main__':
    if len(sys.argv) != 4 or sys.argv[1] not in ('json', 'org'):
        print("Notes-Query")
        print("Usage: {} <json|org> <path> <query>".format(sys.argv[0]))
        exit(0)
    main(sys.argv[1], sys.argv[2], sys.argv[3])

  • This code takes a mode (org) as first argument, xapian dir as second, and a query as third.

Emacs code

(defun smp/search-notes (query)
    (interactive "sQuery? ")
    (let ((process "*notes-query*")
          (buffer "*notes-query*"))
      (switch-to-buffer buffer)
      (read-only-mode -1)
      (erase-buffer)
      (insert (concat "* " query "\n"))
      (backward-page)
      (start-process process buffer
                     smp/nq--python-interpreter
                     smp/nq--python-module
                     "org"
                     smp/nq--sqlitefts-index
                     query)
      (org-mode)
      (read-only-mode 1)))

: smp/search-notes

Index generator code

#!/usr/bin/env python3

import os
import sys
import collections
import logging
import json
from datetime import datetime

import sqlite3
import shutil

import inotify.adapters
import org_rw

import xdg

Config = collections.namedtuple('Config', ['path'])

BASE_DIR = os.path.join(xdg.XDG_CONFIG_HOME, "plibrarian")
CONFIG_PATH = os.path.join(BASE_DIR, "config.json")

DB_PATH = os.path.join(xdg.XDG_CACHE_HOME, "plibrarian", "db.sqlite3")

MONITORED_EVENT_TYPES = (
    'IN_CREATE',
    # 'IN_MODIFY',
    'IN_CLOSE_WRITE',
    'IN_DELETE',
    'IN_MOVED_FROM',
    'IN_MOVED_TO',
    'IN_DELETE_SELF',
    'IN_MOVE_SELF',
)


def create_db(path):
    if os.path.exists(path):
        os.unlink(path)

    if not os.path.exists(os.path.dirname(path)):
        os.makedirs(os.path.dirname(path), exist_ok=True)

    db = sqlite3.connect(path)
    db.execute('CREATE VIRTUAL TABLE note_search USING fts5(note_id, title, body, doc_id, top_level_title, is_done, is_todo, tokenize="trigram");')
    return db

def add_to_index(doc, cur):
    for hl in doc.getAllHeadlines():

        topLevelHeadline = hl
        while isinstance(topLevelHeadline.parent, org_rw.Headline):
            topLevelHeadline = topLevelHeadline.parent

        cur.execute('''INSERT INTO note_search(note_id, title, body, doc_id, top_level_title, is_done, is_todo) VALUES (?, ?, ?, ?, ?, ?, ?);''',
                    (
                        hl.id,
                        hl.title.get_text().strip(),
                        '\n'.join(hl.doc.dump_headline(hl, recursive=False)),
                        hl.doc.id,
                        topLevelHeadline.title.get_text().strip(),
                        hl.is_done,
                        hl.is_todo,
                    ))

def load_config():
    if not os.path.exists(CONFIG_PATH):
        return None

    with open(CONFIG_PATH, 'rt') as f:
        data = json.load(f)

    return Config(path=os.path.expanduser(data['path']))


def load_all(top_dir_abs):
    top = top_dir_abs

    docs = {}

    for root, dirs, files in os.walk(top):
        for name in files:
            if ".org" not in name:
                continue

            path = os.path.join(root, name)
            try:
                doc = org_rw.load(open(path), extra_cautious=True)
                docs[path] = doc
            except Exception as err:
                import traceback

                traceback.print_exc()
                print(f"== On {path}")
                sys.exit(1)

    return docs


def gen_initial(docs):
    db = create_db(DB_PATH)

    cur = db.cursor()
    for path, doc in docs.items():
        changed = False
        add_to_index(doc, cur)

    cur.close()
    db.commit()
    return db

def update_index(docs, fpath, db):
    if os.path.exists(fpath):
        try:
            with open(fpath, 'rt') as f:
                doc = org_rw.load(f, extra_cautious=True)
        except org_rw.NonReproducibleDocument:
            logging.warning("Error loading document")
            return
    else:
        doc = None
        docs[fpath] = doc

    # Delete old info
    cur = db.cursor()
    if doc.id is not None:
        cur.execute('DELETE FROM note_search WHERE doc_id = ?;', (doc.id,))
        logging.debug("Deleted docid: {}".format(doc.id))
        # Create new info
        add_to_index(doc, cur)
    else:
        cur.execute('DELETE FROM note_search;')
        for path, doc in docs.items():
            changed = False
            add_to_index(doc, cur)


    cur.close()
    db.commit()


def event_loop(notifier, docs, db):
    file_changes = set()
    for event in notifier.event_gen(yield_nones=True):
        if event is None:
            if len(file_changes):
                for fpath in file_changes:
                    logging.info("Reloading {}".format(fpath))
                    update_index(docs, fpath, db)
                file_changes = set()

                ## Commit all info
                # Xapian
                db.commit()

            continue
        (ev, types, directory, file) = event
        if not any([type in MONITORED_EVENT_TYPES for type in types]):
            continue
        filepath = os.path.join(directory, file)
        if '/.git/' in filepath or os.path.basename(filepath).startswith('.'):
            # Ignoring GIT or hidden files
            continue

        # Mark path to update when next "commit" comes
        file_changes.add(filepath)

def main():
    config = load_config()
    if config is None:
        logging.error("Configuration not found on: {}".format(CONFIG_PATH))
        exit(0)

    notifier = inotify.adapters.InotifyTree(config.path)

    logging.info("Loading files...")
    docs = load_all(config.path)
    logging.info("{} files loaded".format(len(docs)))

    logging.info("Creating index...")
    db = gen_initial(docs)
    logging.info("Index created")

    logging.info("Starting event loop")
    event_loop(notifier, docs, db)


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    main()

Dependencies

pip install inotify
pip install git+https://code.codigoparallevar.com/kenkeiras/org-rw@dev/render-as-dom

Config

{
    "path": "~/.logs/brain"
}