registry.el 16.5 KB
Newer Older
Ted Zlatanov's avatar
Ted Zlatanov committed
1 2
;;; registry.el --- Track and remember data items by various fields

3
;; Copyright (C) 2011  Free Software Foundation, Inc.
Ted Zlatanov's avatar
Ted Zlatanov committed
4 5 6 7

;; Author: Teodor Zlatanov <tzz@lifelogs.com>
;; Keywords: data

8 9 10
;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
Ted Zlatanov's avatar
Ted Zlatanov committed
11 12 13 14
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

15
;; GNU Emacs is distributed in the hope that it will be useful,
Ted Zlatanov's avatar
Ted Zlatanov committed
16 17 18 19 20
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
21
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
Ted Zlatanov's avatar
Ted Zlatanov committed
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

;;; Commentary:

;; This library provides a general-purpose EIEIO-based registry
;; database with persistence, initialized with these fields:

;; version: a float, 0.1 currently (don't change it)

;; max-hard: an integer, default 5000000

;; max-soft: an integer, default 50000

;; precious: a list of symbols

;; tracked: a list of symbols

;; tracker: a hashtable tuned for 100 symbols to track (you should
;; only access this with the :lookup2-function and the
;; :lookup2+-function)

;; data: a hashtable with default size 10K and resize threshold 2.0
;; (this reflects the expected usage so override it if you know better)

;; ...plus methods to do all the work: `registry-search',
;; `registry-lookup', `registry-lookup-secondary',
;; `registry-lookup-secondary-value', `registry-insert',
;; `registry-delete', `registry-prune', `registry-size' which see

;; and with the following properties:

;; Every piece of data has a unique ID and some general-purpose fields
;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.

;; ((F1 D1) (F2 D2) (F3 a b c))

;; Note that whether a field has one or many pieces of data, the data
;; is always a list of values.

;; The user decides which fields are "precious", F2 for example.  At
;; PRUNE TIME (when the :prune-function is called), the registry will
;; trim any entries without the F2 field until the size is :max-soft
;; or less.  No entries with the F2 field will be removed at PRUNE
;; TIME.

;; When an entry is inserted, the registry will reject new entries
;; if they bring it over the max-hard limit, even if they have the F2
;; field.

;; The user decides which fields are "tracked", F1 for example.  Any
;; new entry is then indexed by all the tracked fields so it can be
;; quickly looked up that way.  The data is always a list (see example
;; above) and each list element is indexed.

;; Precious and tracked field names must be symbols.  All other
;; fields can be any other Emacs Lisp types.

;;; Code:

80 81
(eval-when-compile (require 'cl))

82
(eval-when-compile
83
  (when (null (ignore-errors (require 'ert)))
84 85
    (defmacro* ert-deftest (name () &body docstring-keys-and-body))))

86 87
(ignore-errors
  (require 'ert))
Ted Zlatanov's avatar
Ted Zlatanov committed
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
(eval-and-compile
  (or (ignore-errors (progn
                       (require 'eieio)
                       (require 'eieio-base)))
      ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib
      (ignore-errors
        (let ((load-path (cons (expand-file-name
                                "gnus-fallback-lib/eieio"
                                (file-name-directory (locate-library "gnus")))
                               load-path)))
          (require 'eieio)
          (require 'eieio-base)))
      (error
       "eieio not found in `load-path' or gnus-fallback-lib/ directory.")))

(defclass registry-db (eieio-persistent)
  ((version :initarg :version
            :initform 0.1
            :type float
            :custom float
            :documentation "The registry version.")
   (max-hard :initarg :max-hard
             :initform 5000000
             :type integer
             :custom integer
             :documentation "Never accept more than this many elements.")
   (max-soft :initarg :max-soft
             :initform 50000
             :type integer
             :custom integer
             :documentation "Prune as much as possible to get to this size.")
   (tracked :initarg :tracked
            :initform nil
            :type t
            :documentation "The tracked (indexed) fields, a list of symbols.")
   (precious :initarg :precious
             :initform nil
             :type t
             :documentation "The precious fields, a list of symbols.")
   (tracker :initarg :tracker
            :type hash-table
            :documentation "The field tracking hashtable.")
   (data :initarg :data
         :type hash-table
         :documentation "The data hashtable.")))

134 135 136 137 138 139 140 141 142 143 144 145
(eval-and-compile
  (defmethod initialize-instance :AFTER ((this registry-db) slots)
    "Set value of data slot of THIS after initialization."
    (with-slots (data tracker) this
      (unless (member :data slots)
	(setq data
	      (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)))
      (unless (member :tracker slots)
	(setq tracker (make-hash-table :size 100 :rehash-size 2.0)))))

  (defmethod registry-lookup ((db registry-db) keys)
    "Search for KEYS in the registry-db THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
146
Returns a alist of the key followed by the entry in a list, not a cons cell."
147 148 149 150 151 152 153 154 155 156
    (let ((data (oref db :data)))
      (delq nil
	    (mapcar
	     (lambda (k)
	       (when (gethash k data)
		 (list k (gethash k data))))
	     keys))))

  (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys)
    "Search for KEYS in the registry-db THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
157
Returns a alist of the key followed by the entry in a list, not a cons cell."
158 159 160 161 162 163 164 165 166
    (let ((data (oref db :data)))
      (delq nil
	    (loop for key in keys
		  when (gethash key data)
		  collect (list key (gethash key data))))))

  (defmethod registry-lookup-secondary ((db registry-db) tracksym
					&optional create)
    "Search for TRACKSYM in the registry-db THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
167
When CREATE is not nil, create the secondary index hashtable if needed."
168 169 170 171 172 173 174 175 176 177 178 179
    (let ((h (gethash tracksym (oref db :tracker))))
      (if h
	  h
	(when create
	  (puthash tracksym
		   (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
		   (oref db :tracker))
	  (gethash tracksym (oref db :tracker))))))

  (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
					      &optional set)
    "Search for TRACKSYM with value VAL in the registry-db THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
180
When SET is not nil, set it for VAL (use t for an empty list)."
181 182 183 184 185 186 187
    ;; either we're asked for creation or there should be an existing index
    (when (or set (registry-lookup-secondary db tracksym))
      ;; set the entry if requested,
      (when set
	(puthash val (if (eq t set) '() set)
		 (registry-lookup-secondary db tracksym t)))
      (gethash val (registry-lookup-secondary db tracksym)))))
Ted Zlatanov's avatar
Ted Zlatanov committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208

(defun registry--match (mode entry check-list)
  ;; for all members
  (when check-list
    (let ((key (nth 0 (nth 0 check-list)))
          (vals (cdr-safe (nth 0 check-list)))
          found)
      (while (and key vals (not found))
        (setq found (case mode
                      (:member
                       (member (car-safe vals) (cdr-safe (assoc key entry))))
                      (:regex
                       (string-match (car vals)
                                     (mapconcat
                                      'prin1-to-string
                                      (cdr-safe (assoc key entry))
                                      "\0"))))
              vals (cdr-safe vals)))
      (or found
          (registry--match mode entry (cdr-safe check-list))))))

209 210 211
(eval-and-compile
  (defmethod registry-search ((db registry-db) &rest spec)
    "Search for SPEC across the registry-db THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
212 213 214 215
For example calling with :member '(a 1 2) will match entry '((a 3 1)).
Calling with :all t (any non-nil value) will match all.
Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
The test order is to check :all first, then :member, then :regex."
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
    (when db
      (let ((all (plist-get spec :all))
	    (member (plist-get spec :member))
	    (regex (plist-get spec :regex)))
	(loop for k being the hash-keys of (oref db :data)
	      using (hash-values v)
	      when (or
		    ;; :all non-nil returns all
		    all
		    ;; member matching
		    (and member (registry--match :member v member))
		    ;; regex matching
		    (and regex (registry--match :regex v regex)))
	      collect k))))

  (defmethod registry-delete ((db registry-db) keys assert &rest spec)
    "Delete KEYS from the registry-db THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
233 234 235
If KEYS is nil, use SPEC to do a search.
Updates the secondary ('tracked') indices as well.
With assert non-nil, errors out if the key does not exist already."
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
    (let* ((data (oref db :data))
	   (keys (or keys
		     (apply 'registry-search db spec)))
	   (tracked (oref db :tracked)))

      (dolist (key keys)
	(let ((entry (gethash key data)))
	  (when assert
	    (assert entry nil
		    "Key %s does not exists in database" key))
	  ;; clean entry from the secondary indices
	  (dolist (tr tracked)
	    ;; is this tracked symbol indexed?
	    (when (registry-lookup-secondary db tr)
	      ;; for every value in the entry under that key...
	      (dolist (val (cdr-safe (assq tr entry)))
		(let* ((value-keys (registry-lookup-secondary-value
				    db tr val)))
		  (when (member key value-keys)
		    ;; override the previous value
		    (registry-lookup-secondary-value
		     db tr val
		     ;; with the indexed keys MINUS the current key
		     ;; (we pass t when the list is empty)
		     (or (delete key value-keys) t)))))))
	  (remhash key data)))
      keys))

  (defmethod registry-insert ((db registry-db) key entry)
    "Insert ENTRY under KEY into the registry-db THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
266 267 268
Updates the secondary ('tracked') indices as well.
Errors out if the key exists already."

269 270 271 272 273 274
    (assert (not (gethash key (oref db :data))) nil
	    "Key already exists in database")

    (assert (< (registry-size db)
	       (oref db :max-hard))
	    nil
275
	    "registry max-hard size limit reached")
276 277 278 279 280

    ;; store the entry
    (puthash key entry (oref db :data))

    ;; store the secondary indices
281
    (dolist (tr (oref db :tracked))
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
      ;; for every value in the entry under that key...
      (dolist (val (cdr-safe (assq tr entry)))
	(let* ((value-keys (registry-lookup-secondary-value db tr val)))
	  (pushnew key value-keys :test 'equal)
	  (registry-lookup-secondary-value db tr val value-keys))))
    entry)

  (defmethod registry-reindex ((db registry-db))
    "Rebuild the secondary indices of registry-db THIS."
    (let ((count 0)
	  (expected (* (length (oref db :tracked)) (registry-size db))))
      (dolist (tr (oref db :tracked))
	(let (values)
	  (maphash
	   (lambda (key v)
	     (incf count)
	     (when (and (< 0 expected)
			(= 0 (mod count 1000)))
	       (message "reindexing: %d of %d (%.2f%%)"
301
			count expected (/ (* 100 count) expected)))
302 303 304 305 306 307 308 309
	     (dolist (val (cdr-safe (assq tr v)))
	       (let* ((value-keys (registry-lookup-secondary-value db tr val)))
		 (push key value-keys)
		 (registry-lookup-secondary-value db tr val value-keys))))
	   (oref db :data))))))

  (defmethod registry-size ((db registry-db))
    "Returns the size of the registry-db object THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
310
This is the key count of the :data slot."
311
    (hash-table-count (oref db :data)))
Ted Zlatanov's avatar
Ted Zlatanov committed
312

313 314
  (defmethod registry-prune ((db registry-db))
    "Prunes the registry-db object THIS.
Ted Zlatanov's avatar
Ted Zlatanov committed
315
Removes only entries without the :precious keys."
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
    (let* ((precious (oref db :precious))
	   (precious-p (lambda (entry-key)
			 (cdr (memq (car entry-key) precious))))
	   (data (oref db :data))
	   (limit (oref db :max-soft))
	   (size (registry-size db))
	   (candidates (loop for k being the hash-keys of data
			     using (hash-values v)
			     when (notany precious-p v)
			     collect k))
	   (candidates-count (length candidates))
	   ;; are we over max-soft?
	   (prune-needed (> size limit)))

      ;; while we have more candidates than we need to remove...
      (while (and (> candidates-count (- size limit)) candidates)
	(decf candidates-count)
	(setq candidates (cdr candidates)))

      (registry-delete db candidates nil))))
Ted Zlatanov's avatar
Ted Zlatanov committed
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384

(ert-deftest registry-instantiation-test ()
  (should (registry-db "Testing")))

(ert-deftest registry-match-test ()
  (let ((entry '((hello "goodbye" "bye") (blank))))

    (message "Testing :regex matching")
    (should (registry--match :regex entry '((hello "nye" "bye"))))
    (should (registry--match :regex entry '((hello "good"))))
    (should-not (registry--match :regex entry '((hello "nye"))))
    (should-not (registry--match :regex entry '((hello))))

    (message "Testing :member matching")
    (should (registry--match :member entry '((hello "bye"))))
    (should (registry--match :member entry '((hello "goodbye"))))
    (should-not (registry--match :member entry '((hello "good"))))
    (should-not (registry--match :member entry '((hello "nye"))))
    (should-not (registry--match :member entry '((hello)))))
  (message "Done with matching testing."))

(defun registry-make-testable-db (n &optional name file)
  (let* ((db (registry-db
              (or name "Testing")
              :file (or file "unused")
              :max-hard n
              :max-soft 0               ; keep nothing not precious
              :precious '(extra more-extra)
              :tracked '(sender subject groups))))
    (dotimes (i n)
      (registry-insert db i `((sender "me")
                              (subject "about you")
                              (more-extra) ; empty data key should be pruned
                              ;; first 5 entries will NOT have this extra data
                              ,@(when (< 5 i) (list (list 'extra "more data")))
                              (groups ,(number-to-string i)))))
    db))

(ert-deftest registry-usage-test ()
  (let* ((n 100)
         (db (registry-make-testable-db n)))
    (message "size %d" n)
    (should (= n (registry-size db)))
    (message "max-hard test")
    (should-error (registry-insert db "new" '()))
    (message "Individual lookup")
    (should (= 58 (caadr (registry-lookup db '(1 58 99)))))
    (message "Grouped individual lookup")
    (should (= 3 (length (registry-lookup db '(1 58 99)))))
385 386 387
    (when (boundp 'lexical-binding)
      (message "Individual lookup (breaks before lexbind)")
      (should (= 58
388
                 (caadr (registry-lookup-breaks-before-lexbind db '(1 58 99)))))
389 390
      (message "Grouped individual lookup (breaks before lexbind)")
      (should (= 3
391 392
                 (length (registry-lookup-breaks-before-lexbind db
                                                                '(1 58 99))))))
Ted Zlatanov's avatar
Ted Zlatanov committed
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
    (message "Search")
    (should (= n (length (registry-search db :all t))))
    (should (= n (length (registry-search db :member '((sender "me"))))))
    (message "Secondary index search")
    (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
    (should (equal '(74) (registry-lookup-secondary-value db 'groups "74")))
    (message "Delete")
    (should (registry-delete db '(1) t))
    (decf n)
    (message "Search after delete")
    (should (= n (length (registry-search db :all t))))
    (message "Secondary search after delete")
    (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
    (message "Pruning")
    (let* ((tokeep (registry-search db :member '((extra "more data"))))
           (count (- n (length tokeep)))
           (pruned (registry-prune db))
           (prune-count (length pruned)))
      (message "Expecting to prune %d entries and pruned %d"
               count prune-count)
      (should (and (= count 5)
                   (= count prune-count))))
    (message "Done with usage testing.")))

(ert-deftest registry-persistence-test ()
  (let* ((n 100)
         (tempfile (make-temp-file "registry-persistence-"))
         (name "persistence tester")
         (db (registry-make-testable-db n name tempfile))
         size back)
    (message "Saving to %s" tempfile)
    (eieio-persistent-save db)
    (setq size (nth 7 (file-attributes tempfile)))
    (message "Saved to %s: size %d" tempfile size)
    (should (< 0 size))
    (with-temp-buffer
      (insert-file-contents-literally tempfile)
      (should (looking-at (concat ";; Object "
                                  name
                                  "\n;; EIEIO PERSISTENT OBJECT"))))
    (message "Reading object back")
    (setq back (eieio-persistent-read tempfile))
    (should back)
    (message "Read object back: %d keys, expected %d==%d"
             (registry-size back) n (registry-size db))
    (should (= (registry-size back) n))
    (should (= (registry-size back) (registry-size db)))
    (delete-file tempfile))
  (message "Done with persistence testing."))

(provide 'registry)
;;; registry.el ends here