registry.el 13.9 KB
Newer Older
Ted Zlatanov's avatar
Ted Zlatanov committed
1 2
;;; registry.el --- Track and remember data items by various fields

Paul Eggert's avatar
Paul Eggert committed
3
;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
Ted Zlatanov's avatar
Ted Zlatanov committed
4 5 6 7

;; Author: Teodor Zlatanov <tzz@lifelogs.com>
;; Keywords: data

8 9 10
;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
Ted Zlatanov's avatar
Ted Zlatanov committed
11 12 13 14
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

15
;; GNU Emacs is distributed in the hope that it will be useful,
Ted Zlatanov's avatar
Ted Zlatanov committed
16 17 18 19 20
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
21
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
Ted Zlatanov's avatar
Ted Zlatanov committed
22 23 24 25 26 27

;;; Commentary:

;; This library provides a general-purpose EIEIO-based registry
;; database with persistence, initialized with these fields:

28
;; version: a float
Ted Zlatanov's avatar
Ted Zlatanov committed
29

30
;; max-size: an integer, default most-positive-fixnum
Ted Zlatanov's avatar
Ted Zlatanov committed
31

32
;; prune-factor: a float between 0 and 1, default 0.1
Ted Zlatanov's avatar
Ted Zlatanov committed
33 34 35 36 37

;; precious: a list of symbols

;; tracked: a list of symbols

Paul Eggert's avatar
Paul Eggert committed
38
;; tracker: a hash table tuned for 100 symbols to track (you should
Ted Zlatanov's avatar
Ted Zlatanov committed
39 40 41
;; only access this with the :lookup2-function and the
;; :lookup2+-function)

Paul Eggert's avatar
Paul Eggert committed
42
;; data: a hash table with default size 10K and resize threshold 2.0
Ted Zlatanov's avatar
Ted Zlatanov committed
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
;; (this reflects the expected usage so override it if you know better)

;; ...plus methods to do all the work: `registry-search',
;; `registry-lookup', `registry-lookup-secondary',
;; `registry-lookup-secondary-value', `registry-insert',
;; `registry-delete', `registry-prune', `registry-size' which see

;; and with the following properties:

;; Every piece of data has a unique ID and some general-purpose fields
;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.

;; ((F1 D1) (F2 D2) (F3 a b c))

;; Note that whether a field has one or many pieces of data, the data
;; is always a list of values.

60 61 62 63 64 65
;; The user decides which fields are "precious", F2 for example.  When
;; the registry is pruned, any entries without the F2 field will be
;; removed until the size is :max-size * :prune-factor _less_ than the
;; maximum database size. No entries with the F2 field will be removed
;; at PRUNE TIME, which means it may not be possible to prune back all
;; the way to the target size.
Ted Zlatanov's avatar
Ted Zlatanov committed
66

67 68
;; When an entry is inserted, the registry will reject new entries if
;; they bring it over the :max-size limit, even if they have the F2
Ted Zlatanov's avatar
Ted Zlatanov committed
69 70 71 72 73 74 75 76 77 78 79 80
;; field.

;; The user decides which fields are "tracked", F1 for example.  Any
;; new entry is then indexed by all the tracked fields so it can be
;; quickly looked up that way.  The data is always a list (see example
;; above) and each list element is indexed.

;; Precious and tracked field names must be symbols.  All other
;; fields can be any other Emacs Lisp types.

;;; Code:

81
(require 'cl-lib)
82 83
(require 'eieio)
(require 'eieio-base)
Ted Zlatanov's avatar
Ted Zlatanov committed
84

85 86 87 88 89 90 91 92 93
;; The version number needs to be kept outside of the class definition
;; itself.  The persistent-save process does *not* write to file any
;; slot values that are equal to the default :initform value.  If a
;; database object is at the most recent version, therefore, its
;; version number will not be written to file.  That makes it
;; difficult to know when a database needs to be upgraded.
(defvar registry-db-version 0.2
  "The current version of the registry format.")

Ted Zlatanov's avatar
Ted Zlatanov committed
94 95
(defclass registry-db (eieio-persistent)
  ((version :initarg :version
96 97
            :initform nil
            :type (or null float)
Ted Zlatanov's avatar
Ted Zlatanov committed
98
            :documentation "The registry version.")
99
   (max-size :initarg :max-size
100 101 102 103 104 105
	     ;; EIEIO's :initform is not 100% compatible with CLOS in
	     ;; that if the form is an atom, it assumes it's constant
	     ;; value rather than an expression, so in order to get the value
	     ;; of `most-positive-fixnum', we need to use an
	     ;; expression that's not just a symbol.
             :initform (symbol-value 'most-positive-fixnum)
Ted Zlatanov's avatar
Ted Zlatanov committed
106 107
             :type integer
             :custom integer
108
             :documentation "The maximum number of registry entries.")
109 110 111 112 113
   (prune-factor
    :initarg :prune-factor
    :initform 0.1
    :type float
    :custom float
114
    :documentation "Prune to (:max-size * :prune-factor) less
115
    than the :max-size limit.  Should be a float between 0 and 1.")
Ted Zlatanov's avatar
Ted Zlatanov committed
116 117 118 119 120 121 122 123 124 125
   (tracked :initarg :tracked
            :initform nil
            :type t
            :documentation "The tracked (indexed) fields, a list of symbols.")
   (precious :initarg :precious
             :initform nil
             :type t
             :documentation "The precious fields, a list of symbols.")
   (tracker :initarg :tracker
            :type hash-table
Paul Eggert's avatar
Paul Eggert committed
126
            :documentation "The field tracking hash table.")
Ted Zlatanov's avatar
Ted Zlatanov committed
127 128
   (data :initarg :data
         :type hash-table
Paul Eggert's avatar
Paul Eggert committed
129
         :documentation "The data hash table.")))
Ted Zlatanov's avatar
Ted Zlatanov committed
130

131
(cl-defmethod initialize-instance :before ((this registry-db) slots)
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
  "Check whether a registry object needs to be upgraded."
  ;; Hardcoded upgrade routines.  Version 0.1 to 0.2 requires the
  ;; :max-soft slot to disappear, and the :max-hard slot to be renamed
  ;; :max-size.
  (let ((current-version
	 (and (plist-member slots :version)
	      (plist-get slots :version))))
    (when (or (null current-version)
	      (eql current-version 0.1))
      (setq slots
	    (plist-put slots :max-size (plist-get slots :max-hard)))
      (setq slots
	    (plist-put slots :version registry-db-version))
      (cl-remf slots :max-hard)
      (cl-remf slots :max-soft))))

148
(cl-defmethod initialize-instance :after ((this registry-db) slots)
149 150 151 152 153 154 155 156
  "Set value of data slot of THIS after initialization."
  (with-slots (data tracker) this
    (unless (member :data slots)
      (setq data
	    (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)))
    (unless (member :tracker slots)
      (setq tracker (make-hash-table :size 100 :rehash-size 2.0)))))

157
(cl-defmethod registry-lookup ((db registry-db) keys)
158
  "Search for KEYS in the registry-db DB.
Juanma Barranquero's avatar
Juanma Barranquero committed
159
Returns an alist of the key followed by the entry in a list, not a cons cell."
160
  (let ((data (oref db data)))
161 162 163 164 165 166 167
    (delq nil
	  (mapcar
	   (lambda (k)
	     (when (gethash k data)
	       (list k (gethash k data))))
	   keys))))

168
(cl-defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys)
169
  "Search for KEYS in the registry-db DB.
Juanma Barranquero's avatar
Juanma Barranquero committed
170
Returns an alist of the key followed by the entry in a list, not a cons cell."
171
  (let ((data (oref db data)))
172
    (delq nil
173 174 175
	  (cl-loop for key in keys
                   when (gethash key data)
                   collect (list key (gethash key data))))))
176

177 178
(cl-defmethod registry-lookup-secondary ((db registry-db) tracksym
					 &optional create)
179
  "Search for TRACKSYM in the registry-db DB.
Paul Eggert's avatar
Paul Eggert committed
180
When CREATE is not nil, create the secondary index hash table if needed."
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
181
  (let ((h (gethash tracksym (oref db tracker))))
182 183 184 185 186
    (if h
	h
      (when create
	(puthash tracksym
		 (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
187 188
		 (oref db tracker))
	(gethash tracksym (oref db tracker))))))
189

190 191
(cl-defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
					       &optional set)
192
  "Search for TRACKSYM with value VAL in the registry-db DB.
Ted Zlatanov's avatar
Ted Zlatanov committed
193
When SET is not nil, set it for VAL (use t for an empty list)."
194 195 196 197 198 199 200
  ;; either we're asked for creation or there should be an existing index
  (when (or set (registry-lookup-secondary db tracksym))
    ;; set the entry if requested,
    (when set
      (puthash val (if (eq t set) '() set)
	       (registry-lookup-secondary db tracksym t)))
    (gethash val (registry-lookup-secondary db tracksym))))
Ted Zlatanov's avatar
Ted Zlatanov committed
201 202 203 204 205 206 207 208

(defun registry--match (mode entry check-list)
  ;; for all members
  (when check-list
    (let ((key (nth 0 (nth 0 check-list)))
          (vals (cdr-safe (nth 0 check-list)))
          found)
      (while (and key vals (not found))
209
        (setq found (cl-case mode
Ted Zlatanov's avatar
Ted Zlatanov committed
210 211 212 213 214 215 216 217 218 219 220 221
                      (:member
                       (member (car-safe vals) (cdr-safe (assoc key entry))))
                      (:regex
                       (string-match (car vals)
                                     (mapconcat
                                      'prin1-to-string
                                      (cdr-safe (assoc key entry))
                                      "\0"))))
              vals (cdr-safe vals)))
      (or found
          (registry--match mode entry (cdr-safe check-list))))))

222
(cl-defmethod registry-search ((db registry-db) &rest spec)
223
  "Search for SPEC across the registry-db DB.
224 225 226
For example calling with `:member \\='(a 1 2)' will match entry \((a 3 1)).
Calling with `:all t' (any non-nil value) will match all.
Calling with `:regex \\='(a \"h.llo\")' will match entry \(a \"hullo\" \"bye\").
Ted Zlatanov's avatar
Ted Zlatanov committed
227
The test order is to check :all first, then :member, then :regex."
228 229 230 231
  (when db
    (let ((all (plist-get spec :all))
	  (member (plist-get spec :member))
	  (regex (plist-get spec :regex)))
232 233 234 235 236 237 238 239 240 241
      (cl-loop for k being the hash-keys of (oref db data)
               using (hash-values v)
               when (or
                     ;; :all non-nil returns all
                     all
                     ;; member matching
                     (and member (registry--match :member v member))
                     ;; regex matching
                     (and regex (registry--match :regex v regex)))
               collect k))))
242

243
(cl-defmethod registry-delete ((db registry-db) keys assert &rest spec)
244
  "Delete KEYS from the registry-db DB.
Ted Zlatanov's avatar
Ted Zlatanov committed
245 246 247
If KEYS is nil, use SPEC to do a search.
Updates the secondary ('tracked') indices as well.
With assert non-nil, errors out if the key does not exist already."
248
  (let* ((data (oref db data))
249 250
	 (keys (or keys
		   (apply 'registry-search db spec)))
251
	 (tracked (oref db tracked)))
252 253 254 255

    (dolist (key keys)
      (let ((entry (gethash key data)))
	(when assert
256
	  (cl-assert entry nil "Key %s does not exist in database" key))
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
	;; clean entry from the secondary indices
	(dolist (tr tracked)
	  ;; is this tracked symbol indexed?
	  (when (registry-lookup-secondary db tr)
	    ;; for every value in the entry under that key...
	    (dolist (val (cdr-safe (assq tr entry)))
	      (let* ((value-keys (registry-lookup-secondary-value
				  db tr val)))
		(when (member key value-keys)
		  ;; override the previous value
		  (registry-lookup-secondary-value
		   db tr val
		   ;; with the indexed keys MINUS the current key
		   ;; (we pass t when the list is empty)
		   (or (delete key value-keys) t)))))))
	(remhash key data)))
    keys))

275
(cl-defmethod registry-size ((db registry-db))
276
  "Return the size of the registry-db object DB.
277 278
This is the key count of the `data' slot."
  (hash-table-count (oref db data)))
279

280
(cl-defmethod registry-full ((db registry-db))
281
  "Check if registry-db DB is full."
282
  (>= (registry-size db)
283
      (oref db max-size)))
284

285
(cl-defmethod registry-insert ((db registry-db) key entry)
286
  "Insert ENTRY under KEY into the registry-db DB.
Ted Zlatanov's avatar
Ted Zlatanov committed
287 288
Updates the secondary ('tracked') indices as well.
Errors out if the key exists already."
289 290 291 292
  (cl-assert (not (gethash key (oref db data))) nil
             "Key already exists in database")
  (cl-assert (not (registry-full db)) nil
             "registry max-size limit reached")
293 294

  ;; store the entry
295
  (puthash key entry (oref db data))
296 297

  ;; store the secondary indices
298
  (dolist (tr (oref db tracked))
299 300 301
    ;; for every value in the entry under that key...
    (dolist (val (cdr-safe (assq tr entry)))
      (let* ((value-keys (registry-lookup-secondary-value db tr val)))
302
	(cl-pushnew key value-keys :test 'equal)
303 304 305
	(registry-lookup-secondary-value db tr val value-keys))))
  entry)

306
(cl-defmethod registry-reindex ((db registry-db))
307
  "Rebuild the secondary indices of registry-db DB."
308
  (let ((count 0)
309 310
	(expected (* (length (oref db tracked)) (registry-size db))))
    (dolist (tr (oref db tracked))
311 312 313
      (let (values)
	(maphash
	 (lambda (key v)
314
	   (cl-incf count)
315 316 317
	   (when (and (< 0 expected)
		      (= 0 (mod count 1000)))
	     (message "reindexing: %d of %d (%.2f%%)"
318
		      count expected (/ (* 100.0 count) expected)))
319
	   (dolist (val (cdr-safe (assq tr v)))
320
	     (let ((value-keys (registry-lookup-secondary-value db tr val)))
321 322
	       (push key value-keys)
	       (registry-lookup-secondary-value db tr val value-keys))))
323
	 (oref db data))))))
324

325
(cl-defmethod registry-prune ((db registry-db) &optional sortfunc)
326
  "Prune the registry-db object DB.
327 328

Attempts to prune the number of entries down to \(*
329
:max-size :prune-factor) less than the max-size limit, so
330 331 332 333 334 335 336 337 338
pruning doesn't need to happen on every save. Removes only
entries without the :precious keys, so it may not be possible to
reach the target limit.

Entries to be pruned are first sorted using SORTFUNC.  Entries
from the front of the list are deleted first.

Returns the number of deleted entries."
  (let ((size (registry-size db))
339 340 341 342
	(target-size
	 (floor (- (oref db max-size)
		   (* (oref db max-size)
		      (oref db prune-factor)))))
343
	candidates)
344
    (if (registry-full db)
345 346 347 348 349 350 351
	(progn
	  (setq candidates
		(registry-collect-prune-candidates
		 db (- size target-size) sortfunc))
	  (length (registry-delete db candidates nil)))
      0)))

352 353
(cl-defmethod registry-collect-prune-candidates ((db registry-db)
						 limit sortfunc)
354
  "Collect pruning candidates from the registry-db object DB.
355 356 357 358

Proposes only entries without the :precious keys, and attempts to
return LIMIT such candidates.  If SORTFUNC is provided, sort
entries first and return candidates from beginning of list."
359
  (let* ((precious (oref db precious))
360
	 (precious-p (lambda (entry-key)
Glenn Morris's avatar
Glenn Morris committed
361
		       (memq (car-safe entry-key) precious)))
362
	 (data (oref db data))
363 364
	 (candidates (cl-loop for k being the hash-keys of data
			      using (hash-values v)
365 366
			      when (and (listp v)
                                        (cl-notany precious-p v))
367 368 369 370 371
			      collect (cons k v))))
    ;; We want the full entries for sorting, but should only return a
    ;; list of entry keys.
    (when sortfunc
      (setq candidates (sort candidates sortfunc)))
372
    (cl-subseq (mapcar #'car candidates) 0 (min limit (length candidates)))))
Ted Zlatanov's avatar
Ted Zlatanov committed
373 374 375

(provide 'registry)
;;; registry.el ends here