iso8601.el 14.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
;;; iso8601.el --- parse ISO 8601 date/time strings  -*- lexical-binding:t -*-

;; Copyright (C) 2019 Free Software Foundation, Inc.

;; Keywords: dates

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
;; ISO8601 times basically look like 1985-04-01T15:23:49...  Or so
;; you'd think.  This is what everybody means when they say "ISO8601",
;; but it's in reality a quite large collection of syntaxes, including
;; week numbers, ordinal dates, durations and intervals.  This package
;; has functions for parsing them all.
;;
;; The interface functions are `iso8601-parse', `iso8601-parse-date',
;; `iso8601-parse-time', `iso8601-parse-zone',
;; `iso8601-parse-duration' and `iso8601-parse-interval'.  They all
;; return decoded time objects, except the last one, which returns a
;; list of three of them.
;;
;; (iso8601-parse-interval "P1Y2M10DT2H30M/2008W32T153000-01")
;; '((0 0 13 24 5 2007 nil nil -3600)
;;   (0 30 15 3 8 2008 nil nil -3600)
;;   (0 30 2 10 2 1 nil nil nil))
;;
;;
;; The standard can be found at:
;;
44
;; http://www.loc.gov/standards/datetime/iso-tc154-wg5_n0038_iso_wd_8601-1_2016-02-16.pdf
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
45
;;
46
;; The Wikipedia page on the standard is also informative:
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
47
;;
48
;; https://en.wikipedia.org/wiki/ISO_8601
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
49 50
;;
;; RFC3339 defines the subset that everybody thinks of as "ISO8601".
51

52 53
;;; Code:

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
54
(require 'time-date)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
55
(require 'cl-lib)
56

57 58 59 60 61 62 63
(defun iso8601--concat-regexps (regexps)
  (mapconcat (lambda (regexp)
               (concat "\\(?:"
                       (replace-regexp-in-string "(" "(?:" regexp)
                       "\\)"))
             regexps "\\|"))

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
64 65 66
(defconst iso8601--year-match
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)")
(defconst iso8601--full-date-match
67
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
68
(defconst iso8601--without-day-match
69
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-\\([0-9][0-9]\\)")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
70 71 72
(defconst iso8601--outdated-date-match
  "--\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)")
(defconst iso8601--week-date-match
73
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?W\\([0-9][0-9]\\)-?\\([0-9]\\)?")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
74
(defconst iso8601--ordinal-date-match
75
  "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9][0-9]\\)")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
76
(defconst iso8601--date-match
77 78
  (iso8601--concat-regexps
   (list iso8601--year-match
79 80 81 82 83
         iso8601--full-date-match
         iso8601--without-day-match
         iso8601--outdated-date-match
         iso8601--week-date-match
         iso8601--ordinal-date-match)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
84 85 86 87 88

(defconst iso8601--time-match
  "\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?:?\\([0-9][0-9]\\)?\\.?\\([0-9][0-9][0-9]\\)?")

(defconst iso8601--zone-match
89
  "\\(Z\\|\\([-+]\\)\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?\\)")
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
90

91 92 93 94
(defconst iso8601--full-time-match
  (concat "\\(" (replace-regexp-in-string "(" "(?:" iso8601--time-match) "\\)"
          "\\(" iso8601--zone-match "\\)?"))

95 96 97 98
(defconst iso8601--combined-match
  (concat "\\(" iso8601--date-match "\\)"
          "\\(?:T\\("
          (replace-regexp-in-string "(" "(?:" iso8601--time-match)
99 100
          "\\)"
          "\\(" iso8601--zone-match "\\)?\\)?"))
101 102 103 104 105 106 107 108 109 110 111 112 113

(defconst iso8601--duration-full-match
  "P\\([0-9]+Y\\)?\\([0-9]+M\\)?\\([0-9]+D\\)?\\(T\\([0-9]+H\\)?\\([0-9]+M\\)?\\([0-9]+S\\)?\\)?")
(defconst iso8601--duration-week-match
  "P\\([0-9]+\\)W")
(defconst iso8601--duration-combined-match
  (concat "P" iso8601--combined-match))
(defconst iso8601--duration-match
  (iso8601--concat-regexps
   (list iso8601--duration-full-match
         iso8601--duration-week-match
         iso8601--duration-combined-match)))

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
114 115 116 117 118 119 120 121 122 123 124 125 126
(defun iso8601-parse (string)
  "Parse an ISO 8601 date/time string and return a `decoded-time' structure.

The ISO 8601 date/time strings look like \"2008-03-02T13:47:30\",
but shorter, incomplete strings like \"2008-03-02\" are valid, as
well as variants like \"2008W32\" (week number) and
\"2008-234\" (ordinal day number)."
  (if (not (iso8601-valid-p string))
      (signal 'wrong-type-argument string)
    (let* ((date-string (match-string 1 string))
           (time-string (match-string 2 string))
           (zone-string (match-string 3 string))
           (date (iso8601-parse-date date-string)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
127
      ;; The time portion is optional.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
128 129 130 131 132
      (when time-string
        (let ((time (iso8601-parse-time time-string)))
          (setf (decoded-time-hour date) (decoded-time-hour time))
          (setf (decoded-time-minute date) (decoded-time-minute time))
          (setf (decoded-time-second date) (decoded-time-second time))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
133
      ;; The time zone is optional.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
134 135 136 137 138 139
      (when zone-string
        (setf (decoded-time-zone date)
              ;; The time zone in decoded times are in seconds.
              (* (iso8601-parse-zone zone-string) 60)))
      date)))

140 141 142 143
(defun iso8601-parse-date (string)
  "Parse STRING (which should be on ISO 8601 format) and return a time value."
  (cond
   ;; Just a year: [-+]YYYY.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
144
   ((iso8601--match iso8601--year-match string)
145 146 147
    (iso8601--decoded-time
     :year (iso8601--adjust-year (match-string 1 string)
                                 (match-string 2 string))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
148 149
   ;; Calendar dates: YYYY-MM-DD and variants.
   ((iso8601--match iso8601--full-date-match string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
150
    (iso8601--decoded-time
151 152 153 154
     :year (iso8601--adjust-year (match-string 1 string)
                                 (match-string 2 string))
     :month (match-string 3 string)
     :day (match-string 4 string)))
155
   ;; Calendar date without day: YYYY-MM.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
156
   ((iso8601--match iso8601--without-day-match string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
157
    (iso8601--decoded-time
158 159 160
     :year (iso8601--adjust-year (match-string 1 string)
                                 (match-string 2 string))
     :month (match-string 3 string)))
161
   ;; Outdated date without year: --MM-DD
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
162
   ((iso8601--match iso8601--outdated-date-match string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
163 164 165
    (iso8601--decoded-time
     :month (match-string 1 string)
     :day (match-string 2 string)))
166
   ;; Week dates: YYYY-Www-D
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
167
   ((iso8601--match iso8601--week-date-match string)
168 169 170 171 172
    (let* ((year (iso8601--adjust-year (match-string 1 string)
                                       (match-string 2 string)))
           (week (string-to-number (match-string 3 string)))
           (day-of-week (and (match-string 4 string)
                             (string-to-number (match-string 4 string))))
173 174
           (jan-start (decoded-time-weekday
                       (decode-time
175
                        (iso8601--encode-time
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
176 177 178
                         (iso8601--decoded-time :year year
                                                :month 1
                                                :day 4)))))
179 180 181 182 183 184 185
           (correction (+ (if (zerop jan-start) 7 jan-start)
                          3))
           (ordinal (+ (* week 7) (or day-of-week 0) (- correction))))
      (cond
       ;; Monday 29 December 2008 is written "2009-W01-1".
       ((< ordinal 1)
        (setq year (1- year)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
186 187
              ordinal (+ ordinal (if (date-leap-year-p year)
                                     366 365))))
188
       ;; Sunday 3 January 2010 is written "2009-W53-7".
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
189 190 191 192
       ((> ordinal (if (date-leap-year-p year)
                       366 365))
        (setq ordinal (- ordinal (if (date-leap-year-p year)
                                     366 365))
193
              year (1+ year))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
194
      (let ((month-day (date-ordinal-to-time year ordinal)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
195 196 197
        (iso8601--decoded-time :year year
                               :month (decoded-time-month month-day)
                               :day (decoded-time-day month-day)))))
198
   ;; Ordinal dates: YYYY-DDD
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
199
   ((iso8601--match iso8601--ordinal-date-match string)
200 201 202
    (let* ((year (iso8601--adjust-year (match-string 1 string)
                                       (match-string 2 string)))
           (ordinal (string-to-number (match-string 3 string)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
203
           (month-day (date-ordinal-to-time year ordinal)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
204 205
      (iso8601--decoded-time :year year
                             :month (decoded-time-month month-day)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
206 207 208
                             :day (decoded-time-day month-day))))
   (t
    (signal 'wrong-type-argument string))))
209

210 211 212 213 214 215 216 217 218 219
(defun iso8601--adjust-year (sign year)
  (save-match-data
    (let ((year (if (stringp year)
                    (string-to-number year)
                  year)))
      (if (string= sign "-")
          ;; -0001 is 2 BCE.
          (1- (- year))
        year))))

220 221
(defun iso8601-parse-time (string)
  "Parse STRING, which should be an ISO 8601 time string, and return a time value."
222
  (if (not (iso8601--match iso8601--full-time-match string))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
223
      (signal 'wrong-type-argument string)
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
    (let ((time (match-string 1 string))
          (zone (match-string 2 string)))
      (if (not (iso8601--match iso8601--time-match time))
          (signal 'wrong-type-argument string)
        (let ((hour (string-to-number (match-string 1 time)))
              (minute (and (match-string 2 time)
                           (string-to-number (match-string 2 time))))
              (second (and (match-string 3 time)
                           (string-to-number (match-string 3 time))))
              ;; Hm...
              (_millisecond (and (match-string 4 time)
                                 (string-to-number (match-string 4 time)))))
          (iso8601--decoded-time :hour hour
                                 :minute (or minute 0)
                                 :second (or second 0)
                                 :zone (and zone
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
240 241
                                            (* 60 (iso8601-parse-zone
                                                   zone)))))))))
242 243 244 245

(defun iso8601-parse-zone (string)
  "Parse STRING, which should be an ISO 8601 time zone.
Return the number of minutes."
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
246 247
  (if (not (iso8601--match iso8601--zone-match string))
      (signal 'wrong-type-argument string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
248
    (if (match-string 2 string)
249 250 251
        ;; HH:MM-ish.
        (let ((hour (string-to-number (match-string 3 string)))
              (minute (and (match-string 4 string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
252 253 254 255 256
                           (string-to-number (match-string 4 string)))))
          (* (if (equal (match-string 2 string) "-")
                 -1
               1)
             (+ (* hour 60)
257 258 259 260
                (or minute 0))))
      ;; "Z".
      0)))

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
261 262
(defun iso8601-valid-p (string)
  "Say whether STRING is a valid ISO 8601 representation."
263
  (iso8601--match iso8601--combined-match string))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
264

265 266 267
(defun iso8601-parse-duration (string)
  "Parse ISO 8601 durations on the form P3Y6M4DT12H30M5S."
  (cond
268 269 270
   ((and (iso8601--match iso8601--duration-full-match string)
         ;; Just a "P" isn't valid; there has to be at least one
         ;; element, like P1M.
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
271 272 273 274 275 276 277
         (> (length (match-string 0 string)) 2))
    (iso8601--decoded-time :year (or (match-string 1 string) 0)
                           :month (or (match-string 2 string) 0)
                           :day (or (match-string 3 string) 0)
                           :hour (or (match-string 5 string) 0)
                           :minute (or (match-string 6 string) 0)
                           :second (or (match-string 7 string) 0)))
278
   ;; PnW: Weeks.
279
   ((iso8601--match iso8601--duration-week-match string)
280 281
    (let ((weeks (string-to-number (match-string 1 string))))
      ;; Does this make sense?  Hm...
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
282
      (iso8601--decoded-time :day (* weeks 7))))
283
   ;; P<date>T<time>
284
   ((iso8601--match iso8601--duration-combined-match string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
285 286 287
    (iso8601-parse (substring string 1)))
   (t
    (signal 'wrong-type-argument string))))
288 289 290 291 292

(defun iso8601-parse-interval (string)
  "Parse ISO 8601 intervals."
  (let ((bits (split-string string "/"))
        start end duration)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
293 294
    (if (not (= (length bits) 2))
        (signal 'wrong-type-argument string)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
295 296
      ;; The intervals may be an explicit start/end times, or either a
      ;; start or an end, and an accompanying duration.
297
      (cond
298 299
       ((and (string-match "\\`P" (car bits))
             (iso8601-valid-p (cadr bits)))
300
        (setq duration (iso8601-parse-duration (car bits))
301 302 303
              end (iso8601-parse (cadr bits))))
       ((and (string-match "\\`P" (cadr bits))
             (iso8601-valid-p (car bits)))
304
        (setq duration (iso8601-parse-duration (cadr bits))
305 306 307
              start (iso8601-parse (car bits))))
       ((and (iso8601-valid-p (car bits))
             (iso8601-valid-p (cadr bits)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
308 309
        (setq start (iso8601-parse (car bits))
              end (iso8601-parse (cadr bits))))
310 311
       (t
        (signal 'wrong-type-argument string))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
312
    (unless end
313
      (setq end (decoded-time-add start duration)))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
314
    (unless start
315 316 317 318 319 320 321 322
      (setq start (decoded-time-add end
                                    ;; We negate the duration so that
                                    ;; we get a subtraction.
                                    (mapcar (lambda (elem)
                                              (if (numberp elem)
                                                  (- elem)
                                                elem))
                                            duration))))
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
323 324
    (list start end
          (or duration
325 326
              (decode-time (time-subtract (iso8601--encode-time end)
                                          (iso8601--encode-time start))
327
                           (or (decoded-time-zone end) 0))))))
328

Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
329 330 331
(defun iso8601--match (regexp string)
  (string-match (concat "\\`" regexp "\\'") string))

332
(defun iso8601--value (elem &optional default)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
333 334 335 336
  (if (stringp elem)
      (string-to-number elem)
    (or elem default)))

337 338 339
(cl-defun iso8601--decoded-time (&key second minute hour
                                      day month year
                                      dst zone)
340 341 342 343 344 345
  (list (iso8601--value second)
        (iso8601--value minute)
        (iso8601--value hour)
        (iso8601--value day)
        (iso8601--value month)
        (iso8601--value year)
Lars Ingebrigtsen's avatar
Lars Ingebrigtsen committed
346 347 348 349
        nil
        dst
        zone))

350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
(defun iso8601--encode-time (time)
  "Like `encode-time', but fill in nil values in TIME."
  (setq time (copy-sequence time))
  (unless (decoded-time-second time)
    (setf (decoded-time-second time) 0))
  (unless (decoded-time-minute time)
    (setf (decoded-time-minute time) 0))
  (unless (decoded-time-hour time)
    (setf (decoded-time-hour time) 0))

  (unless (decoded-time-day time)
    (setf (decoded-time-day time) 1))
  (unless (decoded-time-month time)
    (setf (decoded-time-month time) 1))
  (unless (decoded-time-year time)
    (setf (decoded-time-year time) 0))
  (encode-time time))

368 369 370
(provide 'iso8601)

;;; iso8601.el ends here