Table of contents

TODO

Archive links from Personal Wiki

task

%3 cluster_a6aa82b1_53df_4ca3_ac42_f0051be62151 Archive links from Personal Wiki _c48c34a3_2a84_4b48_9760_abe5e541ebaa Pull all links _0873a209_b9df_4bf3_b65e_2f425c5a2adc Librarian / Personal Library _217f35ce_404e_49ed_bfd3_cb3ca3f312db personal wiki _0873a209_b9df_4bf3_b65e_2f425c5a2adc->_217f35ce_404e_49ed_bfd3_cb3ca3f312db __0:cluster_a6aa82b1_53df_4ca3_ac42_f0051be62151->_0873a209_b9df_4bf3_b65e_2f425c5a2adc __1:cluster_a6aa82b1_53df_4ca3_ac42_f0051be62151->_217f35ce_404e_49ed_bfd3_cb3ca3f312db

TODO

Pull all links

poc

  • Using org-rw

    Working, but giving less results than expected 🤔

#!/usr/bin/env python3

import sys
import os
import logging

import org_rw
from org_rw import OrgTime, dom, Link
from org_rw import load as load_org


def load_all(top_dir_relative):
    top = os.path.abspath(top_dir_relative)

    docs = []

    for root, dirs, files in os.walk(top):
        for name in files:
            if ".org" not in name:
                continue

            path = os.path.join(root, name)

            try:
                doc = load_org(open(path), extra_cautious=True)
                docs.append(doc)
            except Exception as err:
                import traceback

                traceback.print_exc()
                print(f"== On {path}")
                sys.exit(1)

    logging.info("Collected {} files".format(len(docs)))
    return docs


def main(src_top):
    orgs = load_all(src_top)
    for org in orgs:
        for link in org.get_links():
            if link.value.startswith('http'):
                print("{}".format(link.value))


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: {} SOURCE_TOP".format(sys.argv[0]))
        exit(0)

    logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
    exit(main(sys.argv[1]))
  • Using regex

    even less…

grep -PRIoh 'https?://[^\]\\ ]+ ' ~/.logs/brain|sort|uniq