iso8601.el 14.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
;;; iso8601.el --- parse ISO 8601 date/time strings  -*- lexical-binding:t -*-

;; Copyright (C) 2019 Free Software Foundation, Inc.

;; Keywords: dates

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
;; ISO8601 times basically look like 1985-04-01T15:23:49...  Or so
;; you'd think.  This is what everybody means when they say "ISO8601",
;; but it's in reality a quite large collection of syntaxes, including
;; week numbers, ordinal dates, durations and intervals.  This package
;; has functions for parsing them all.
;;
;; The interface functions are `iso8601-parse', `iso8601-parse-date',
;; `iso8601-parse-time', `iso8601-parse-zone',
;; `iso8601-parse-duration' and `iso8601-parse-interval'.  They all
;; return decoded time objects, except the last one, which returns a
;; list of three of them.
;;
;; (iso8601-parse-interval "P1Y2M10DT2H30M/2008W32T153000-01")
;; '((0 0 13 24 5 2007 nil nil -3600)
;;   (0 30 15 3 8 2008 nil nil -3600)
;;   (0 30 2 10 2 1 nil nil nil))
;;
;;
;; The standard can be found at:
;;
44
;; http://www.loc.gov/standards/datetime/iso-tc154-wg5_n0038_iso_wd_8601-1_2016-02-16.pdf
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
45
;;
46
;; The Wikipedia page on the standard is also informative:
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
47
;;
48
;; https://en.wikipedia.org/wiki/ISO_8601
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
49 50
;;
;; RFC3339 defines the subset that everybody thinks of as "ISO8601".
51

52 53
;;; Code:

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
54
(require 'time-date)
55
(require 'cl-lib)
56

57 58 59 60 61 62 63
(defun iso8601--concat-regexps (regexps)
  (mapconcat (lambda (regexp)
               (concat "\\(?:"
                       (replace-regexp-in-string "(" "(?:" regexp)
                       "\\)"))
             regexps "\\|"))

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
64 65 66
(defconst iso8601--year-match
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)")
(defconst iso8601--full-date-match
67
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
68
(defconst iso8601--without-day-match
69
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-\\([0-9][0-9]\\)")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
70 71 72
(defconst iso8601--outdated-date-match
  "--\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)")
(defconst iso8601--week-date-match
73
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?W\\([0-9][0-9]\\)-?\\([0-9]\\)?")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
74
(defconst iso8601--ordinal-date-match
75
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9][0-9]\\)")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
76
(defconst iso8601--date-match
77 78
  (iso8601--concat-regexps
   (list iso8601--year-match
79 80 81 82 83
         iso8601--full-date-match
         iso8601--without-day-match
         iso8601--outdated-date-match
         iso8601--week-date-match
         iso8601--ordinal-date-match)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
84 85 86 87 88

(defconst iso8601--time-match
  "\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?:?\\([0-9][0-9]\\)?\\.?\\([0-9][0-9][0-9]\\)?")

(defconst iso8601--zone-match
89
  "\\(Z\\|\\([-+]\\)\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?\\)")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
90

91 92 93 94
(defconst iso8601--full-time-match
  (concat "\\(" (replace-regexp-in-string "(" "(?:" iso8601--time-match) "\\)"
          "\\(" iso8601--zone-match "\\)?"))

95 96 97 98
(defconst iso8601--combined-match
  (concat "\\(" iso8601--date-match "\\)"
          "\\(?:T\\("
          (replace-regexp-in-string "(" "(?:" iso8601--time-match)
99 100
          "\\)"
          "\\(" iso8601--zone-match "\\)?\\)?"))
101 102 103 104 105 106 107 108 109 110 111 112 113

(defconst iso8601--duration-full-match
  "P\\([0-9]+Y\\)?\\([0-9]+M\\)?\\([0-9]+D\\)?\\(T\\([0-9]+H\\)?\\([0-9]+M\\)?\\([0-9]+S\\)?\\)?")
(defconst iso8601--duration-week-match
  "P\\([0-9]+\\)W")
(defconst iso8601--duration-combined-match
  (concat "P" iso8601--combined-match))
(defconst iso8601--duration-match
  (iso8601--concat-regexps
   (list iso8601--duration-full-match
         iso8601--duration-week-match
         iso8601--duration-combined-match)))

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
114 115 116 117 118 119 120 121 122 123 124 125 126
(defun iso8601-parse (string)
  "Parse an ISO 8601 date/time string and return a `decoded-time' structure.

The ISO 8601 date/time strings look like \"2008-03-02T13:47:30\",
but shorter, incomplete strings like \"2008-03-02\" are valid, as
well as variants like \"2008W32\" (week number) and
\"2008-234\" (ordinal day number)."
  (if (not (iso8601-valid-p string))
      (signal 'wrong-type-argument string)
    (let* ((date-string (match-string 1 string))
           (time-string (match-string 2 string))
           (zone-string (match-string 3 string))
           (date (iso8601-parse-date date-string)))
127
      ;; The time portion is optional.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
128 129 130 131 132
      (when time-string
        (let ((time (iso8601-parse-time time-string)))
          (setf (decoded-time-hour date) (decoded-time-hour time))
          (setf (decoded-time-minute date) (decoded-time-minute time))
          (setf (decoded-time-second date) (decoded-time-second time))))
133
      ;; The time zone is optional.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
134 135 136 137 138 139
      (when zone-string
        (setf (decoded-time-zone date)
              ;; The time zone in decoded times are in seconds.
              (* (iso8601-parse-zone zone-string) 60)))
      date)))

140 141 142 143
(defun iso8601-parse-date (string)
  "Parse STRING (which should be on ISO 8601 format) and return a time value."
  (cond
   ;; Just a year: [-+]YYYY.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
144
   ((iso8601--match iso8601--year-match string)
145 146 147
    (iso8601--decoded-time
     :year (iso8601--adjust-year (match-string 1 string)
                                 (match-string 2 string))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
148 149
   ;; Calendar dates: YYYY-MM-DD and variants.
   ((iso8601--match iso8601--full-date-match string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
150
    (iso8601--decoded-time
151 152 153 154
     :year (iso8601--adjust-year (match-string 1 string)
                                 (match-string 2 string))
     :month (match-string 3 string)
     :day (match-string 4 string)))
155
   ;; Calendar date without day: YYYY-MM.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
156
   ((iso8601--match iso8601--without-day-match string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
157
    (iso8601--decoded-time
158 159 160
     :year (iso8601--adjust-year (match-string 1 string)
                                 (match-string 2 string))
     :month (match-string 3 string)))
161
   ;; Outdated date without year: --MM-DD
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
162
   ((iso8601--match iso8601--outdated-date-match string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
163 164 165
    (iso8601--decoded-time
     :month (match-string 1 string)
     :day (match-string 2 string)))
166
   ;; Week dates: YYYY-Www-D
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
167
   ((iso8601--match iso8601--week-date-match string)
168 169 170 171 172
    (let* ((year (iso8601--adjust-year (match-string 1 string)
                                       (match-string 2 string)))
           (week (string-to-number (match-string 3 string)))
           (day-of-week (and (match-string 4 string)
                             (string-to-number (match-string 4 string))))
173 174
           (jan-start (decoded-time-weekday
                       (decode-time
175
                        (iso8601--encode-time
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
176 177 178
                         (iso8601--decoded-time :year year
                                                :month 1
                                                :day 4)))))
179 180 181 182 183 184 185
           (correction (+ (if (zerop jan-start) 7 jan-start)
                          3))
           (ordinal (+ (* week 7) (or day-of-week 0) (- correction))))
      (cond
       ;; Monday 29 December 2008 is written "2009-W01-1".
       ((< ordinal 1)
        (setq year (1- year)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
186 187
              ordinal (+ ordinal (if (date-leap-year-p year)
                                     366 365))))
188
       ;; Sunday 3 January 2010 is written "2009-W53-7".
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
189 190 191 192
       ((> ordinal (if (date-leap-year-p year)
                       366 365))
        (setq ordinal (- ordinal (if (date-leap-year-p year)
                                     366 365))
193
              year (1+ year))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
194
      (let ((month-day (date-ordinal-to-time year ordinal)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
195 196 197
        (iso8601--decoded-time :year year
                               :month (decoded-time-month month-day)
                               :day (decoded-time-day month-day)))))
198
   ;; Ordinal dates: YYYY-DDD
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
199
   ((iso8601--match iso8601--ordinal-date-match string)
200 201 202
    (let* ((year (iso8601--adjust-year (match-string 1 string)
                                       (match-string 2 string)))
           (ordinal (string-to-number (match-string 3 string)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
203
           (month-day (date-ordinal-to-time year ordinal)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
204 205
      (iso8601--decoded-time :year year
                             :month (decoded-time-month month-day)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
206 207 208
                             :day (decoded-time-day month-day))))
   (t
    (signal 'wrong-type-argument string))))
209

210 211 212 213 214 215 216 217 218 219
(defun iso8601--adjust-year (sign year)
  (save-match-data
    (let ((year (if (stringp year)
                    (string-to-number year)
                  year)))
      (if (string= sign "-")
          ;; -0001 is 2 BCE.
          (1- (- year))
        year))))

220 221
(defun iso8601-parse-time (string)
  "Parse STRING, which should be an ISO 8601 time string, and return a time value."
222
  (if (not (iso8601--match iso8601--full-time-match string))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
223
      (signal 'wrong-type-argument string)
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
    (let ((time (match-string 1 string))
          (zone (match-string 2 string)))
      (if (not (iso8601--match iso8601--time-match time))
          (signal 'wrong-type-argument string)
        (let ((hour (string-to-number (match-string 1 time)))
              (minute (and (match-string 2 time)
                           (string-to-number (match-string 2 time))))
              (second (and (match-string 3 time)
                           (string-to-number (match-string 3 time))))
              ;; Hm...
              (_millisecond (and (match-string 4 time)
                                 (string-to-number (match-string 4 time)))))
          (iso8601--decoded-time :hour hour
                                 :minute (or minute 0)
                                 :second (or second 0)
                                 :zone (and zone
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
240 241
                                            (* 60 (iso8601-parse-zone
                                                   zone)))))))))
242 243 244 245

(defun iso8601-parse-zone (string)
  "Parse STRING, which should be an ISO 8601 time zone.
Return the number of minutes."
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
246 247
  (if (not (iso8601--match iso8601--zone-match string))
      (signal 'wrong-type-argument string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
248
    (if (match-string 2 string)
249 250 251
        ;; HH:MM-ish.
        (let ((hour (string-to-number (match-string 3 string)))
              (minute (and (match-string 4 string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
252 253 254 255 256
                           (string-to-number (match-string 4 string)))))
          (* (if (equal (match-string 2 string) "-")
                 -1
               1)
             (+ (* hour 60)
257 258 259 260
                (or minute 0))))
      ;; "Z".
      0)))

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
261 262
(defun iso8601-valid-p (string)
  "Say whether STRING is a valid ISO 8601 representation."
263
  (iso8601--match iso8601--combined-match string))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
264

265 266 267
(defun iso8601-parse-duration (string)
  "Parse ISO 8601 durations on the form P3Y6M4DT12H30M5S."
  (cond
268 269 270
   ((and (iso8601--match iso8601--duration-full-match string)
         ;; Just a "P" isn't valid; there has to be at least one
         ;; element, like P1M.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
271 272 273 274 275 276 277
         (> (length (match-string 0 string)) 2))
    (iso8601--decoded-time :year (or (match-string 1 string) 0)
                           :month (or (match-string 2 string) 0)
                           :day (or (match-string 3 string) 0)
                           :hour (or (match-string 5 string) 0)
                           :minute (or (match-string 6 string) 0)
                           :second (or (match-string 7 string) 0)))
278
   ;; PnW: Weeks.
279
   ((iso8601--match iso8601--duration-week-match string)
280 281
    (let ((weeks (string-to-number (match-string 1 string))))
      ;; Does this make sense?  Hm...
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
282
      (iso8601--decoded-time :day (* weeks 7))))
283
   ;; P<date>T<time>
284
   ((iso8601--match iso8601--duration-combined-match string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
285 286 287
    (iso8601-parse (substring string 1)))
   (t
    (signal 'wrong-type-argument string))))
288 289 290 291 292

(defun iso8601-parse-interval (string)
  "Parse ISO 8601 intervals."
  (let ((bits (split-string string "/"))
        start end duration)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
293 294
    (if (not (= (length bits) 2))
        (signal 'wrong-type-argument string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
295 296
      ;; The intervals may be an explicit start/end times, or either a
      ;; start or an end, and an accompanying duration.
297
      (cond
298 299
       ((and (string-match "\\`P" (car bits))
             (iso8601-valid-p (cadr bits)))
300
        (setq duration (iso8601-parse-duration (car bits))
301 302 303
              end (iso8601-parse (cadr bits))))
       ((and (string-match "\\`P" (cadr bits))
             (iso8601-valid-p (car bits)))
304
        (setq duration (iso8601-parse-duration (cadr bits))
305 306 307
              start (iso8601-parse (car bits))))
       ((and (iso8601-valid-p (car bits))
             (iso8601-valid-p (cadr bits)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
308 309
        (setq start (iso8601-parse (car bits))
              end (iso8601-parse (cadr bits))))
310 311
       (t
        (signal 'wrong-type-argument string))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
312
    (unless end
313
      (setq end (decoded-time-add start duration)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
314
    (unless start
315 316 317 318 319 320 321 322
      (setq start (decoded-time-add end
                                    ;; We negate the duration so that
                                    ;; we get a subtraction.
                                    (mapcar (lambda (elem)
                                              (if (numberp elem)
                                                  (- elem)
                                                elem))
                                            duration))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
323 324
    (list start end
          (or duration
325 326
              (decode-time (time-subtract (iso8601--encode-time end)
                                          (iso8601--encode-time start))
327
                           (or (decoded-time-zone end) 0))))))
328

329 330 331
(defun iso8601--match (regexp string)
  (string-match (concat "\\`" regexp "\\'") string))

332
(defun iso8601--value (elem &optional default)
333 334 335 336
  (if (stringp elem)
      (string-to-number elem)
    (or elem default)))

337 338 339
(cl-defun iso8601--decoded-time (&key second minute hour
                                      day month year
                                      dst zone)
340 341 342 343 344 345
  (list (iso8601--value second)
        (iso8601--value minute)
        (iso8601--value hour)
        (iso8601--value day)
        (iso8601--value month)
        (iso8601--value year)
346 347 348 349
        nil
        dst
        zone))

350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
(defun iso8601--encode-time (time)
  "Like `encode-time', but fill in nil values in TIME."
  (setq time (copy-sequence time))
  (unless (decoded-time-second time)
    (setf (decoded-time-second time) 0))
  (unless (decoded-time-minute time)
    (setf (decoded-time-minute time) 0))
  (unless (decoded-time-hour time)
    (setf (decoded-time-hour time) 0))

  (unless (decoded-time-day time)
    (setf (decoded-time-day time) 1))
  (unless (decoded-time-month time)
    (setf (decoded-time-month time) 1))
  (unless (decoded-time-year time)
    (setf (decoded-time-year time) 0))
  (encode-time time))

368 369 370
(provide 'iso8601)

;;; iso8601.el ends here