Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
emacs
emacs
Commits
f3c3dee6
Commit
f3c3dee6
authored
Mar 28, 2004
by
Vinicius Jose Latorre
Browse files
Parser for EBNF used to specify XML (EBNFX)
parent
9d59cbb0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
673 additions
and
0 deletions
+673
-0
lisp/progmodes/ebnf-ebx.el
lisp/progmodes/ebnf-ebx.el
+673
-0
No files found.
lisp/progmodes/ebnf-ebx.el
0 → 100644
View file @
f3c3dee6
;;; ebnf-ebx.el --- parser for EBNF used to specify XML (EBNFX)
;; Copyright (C) 2004 Free Sofware Foundation, Inc.
;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
;; Time-stamp: <2004/03/22 08:53:21 vinicius>
;; Keywords: wp, ebnf, PostScript
;; Version: 1.0
;; This file is part of GNU Emacs.
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
;;; Commentary:
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;
;; This is part of ebnf2ps package.
;;
;; This package defines a parser for EBNF used to specify XML (EBNFX).
;;
;; See ebnf2ps.el for documentation.
;;
;;
;; EBNFX Syntax
;; ------------
;;
;; See the URL:
;; `http://www.w3.org/TR/2004/REC-xml-20040204/#sec-notation'
;; (Extensible Markup Language (XML) 1.0 (Third Edition))
;;
;;
;; rule ::= symbol '::=' expression
;; /* rules are separated by at least one blank line. */
;;
;; expression ::= concatenation ('|' concatenation)*
;;
;; concatenation ::= exception*
;;
;; exception ::= term ('-' term)?
;;
;; term ::= factor ('*' | '+' | '?')?
;;
;; factor ::= hex-char+
;; | '[' '^'? ( char ( '-' char )? )+ ']'
;; | '"' 'string' '"'
;; | "'" "string" "'"
;; | '(' expression ')'
;; | symbol
;;
;; symbol ::= 'upper or lower case letter'
;; ('upper or lower case letter' | '-' | '_')*
;; /* upper and lower 8-bit accentuated characters are included */
;;
;; hex-char ::= '#x' [0-9A-Fa-f]+
;;
;; char ::= hex-char | 'any character except control characters'
;; /* 8-bit accentuated characters are included */
;;
;; any-char ::= char | 'newline' | 'tab'
;;
;; ignore ::= '[' ('wfc' | 'WFC' | 'vc' | 'VC') ':' ( any-char - ']' )* ']'
;;
;; comment ::= '/*' ( any-char - '*/' ) '*/'
;;
;;
;; Below is the Notation section extracted from the URL cited above.
;;
;; 6 Notation
;;
;; The formal grammar of XML is given in this specification using a simple
;; Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines
;; one symbol, in the form
;;
;; symbol ::= expression
;;
;; Symbols are written with an initial capital letter if they are the start
;; symbol of a regular language, otherwise with an initial lowercase letter.
;; Literal strings are quoted.
;;
;; Within the expression on the right-hand side of a rule, the following
;; expressions are used to match strings of one or more characters:
;;
;; #xN
;;
;; where N is a hexadecimal integer, the expression matches the character
;; whose number (code point) in ISO/IEC 10646 is N. The number of leading
;; zeros in the #xN form is insignificant.
;;
;; [a-zA-Z], [#xN-#xN]
;;
;; matches any Char with a value in the range(s) indicated (inclusive).
;;
;; [abc], [#xN#xN#xN]
;;
;; matches any Char with a value among the characters enumerated.
;; Enumerations and ranges can be mixed in one set of brackets.
;;
;; [^a-z], [^#xN-#xN]
;;
;; matches any Char with a value outside the range indicated.
;;
;; [^abc], [^#xN#xN#xN]
;;
;; matches any Char with a value not among the characters given.
;; Enumerations and ranges of forbidden values can be mixed in one set of
;; brackets.
;;
;; "string"
;;
;; matches a literal string matching that given inside the double quotes.
;;
;; 'string'
;;
;; matches a literal string matching that given inside the single quotes.
;;
;; These symbols may be combined to match more complex patterns as follows,
;; where A and B represent simple expressions:
;;
;; (expression)
;;
;; expression is treated as a unit and may be combined as described in this
;; list.
;;
;; A?
;;
;; matches A or nothing; optional A.
;;
;; A B
;;
;; matches A followed by B. This operator has higher precedence than
;; alternation; thus A B | C D is identical to (A B) | (C D).
;;
;; A | B
;;
;; matches A or B.
;;
;; A - B
;;
;; matches any string that matches A but does not match B.
;;
;; A+
;;
;; matches one or more occurrences of A. Concatenation has higher
;; precedence than alternation; thus A+ | B+ is identical to (A+) | (B+).
;;
;; A*
;;
;; matches zero or more occurrences of A. Concatenation has higher
;; precedence than alternation; thus A* | B* is identical to (A*) | (B*).
;;
;; Other notations used in the productions are:
;;
;; /* ... */
;;
;; comment.
;;
;; [ wfc: ... ]
;;
;; well-formedness constraint; this identifies by name a constraint on
;; well-formed documents associated with a production.
;;
;; [ vc: ... ]
;;
;; validity constraint; this identifies by name a constraint on valid
;; documents associated with a production.
;;
;;
;; Differences Between EBNFX And ebnf2ps EBNFX
;; -------------------------------------------
;;
;; Besides the characters that EBNFX accepts, ebnf2ps EBNFX accepts also the
;; underscore (_) and minus (-) for rule name and european 8-bit accentuated
;; characters (from \240 to \377) for rule name, string and comment. Also
;; rule name can start with upper case letter.
;;
;;
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Code:
(
require
'ebnf-otz
)
(
defvar
ebnf-ebx-lex
nil
"Value returned by `ebnf-ebx-lex' function."
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Syntactic analyzer
;;; rulelist ::= rule+
(
defun
ebnf-ebx-parser
(
start
)
"EBNFX parser."
(
let
((
total
(
+
(
-
ebnf-limit
start
)
1
))
(
bias
(
1-
start
))
(
origin
(
point
))
rule-list
token
rule
)
(
goto-char
start
)
(
setq
token
(
ebnf-ebx-lex
))
(
and
(
eq
token
'end-of-input
)
(
error
"Invalid EBNFX file format"
))
(
and
(
eq
token
'end-of-rule
)
(
setq
token
(
ebnf-ebx-lex
)))
(
while
(
not
(
eq
token
'end-of-input
))
(
ebnf-message-float
"Parsing...%s%%"
(
/
(
*
(
-
(
point
)
bias
)
100.0
)
total
))
(
setq
token
(
ebnf-ebx-rule
token
)
rule
(
cdr
token
)
token
(
car
token
))
(
or
(
ebnf-add-empty-rule-list
rule
)
(
setq
rule-list
(
cons
rule
rule-list
))))
(
goto-char
origin
)
rule-list
))
;;; rule ::= symbol '::=' expression
(
defun
ebnf-ebx-rule
(
token
)
(
let
((
name
ebnf-ebx-lex
)
(
action
ebnf-action
)
elements
)
(
setq
ebnf-action
nil
)
(
or
(
eq
token
'non-terminal
)
(
error
"Invalid rule name"
))
(
setq
token
(
ebnf-ebx-lex
))
(
or
(
eq
token
'production
)
(
error
"Invalid rule: missing `::='"
))
(
setq
elements
(
ebnf-ebx-expression
))
(
or
(
memq
(
car
elements
)
'
(
end-of-rule
end-of-input
))
(
error
"Invalid rule: there is no end of rule"
))
(
setq
elements
(
cdr
elements
))
(
ebnf-eps-add-production
name
)
(
cons
(
ebnf-ebx-lex
)
(
ebnf-make-production
name
elements
action
))))
;; expression ::= concatenation ('|' concatenation)*
(
defun
ebnf-ebx-expression
()
(
let
(
body
concatenation
)
(
while
(
eq
(
car
(
setq
concatenation
(
ebnf-ebx-concatenation
(
ebnf-ebx-lex
))))
'alternative
)
(
setq
body
(
cons
(
cdr
concatenation
)
body
)))
(
ebnf-token-alternative
body
concatenation
)))
;; concatenation ::= exception*
(
defun
ebnf-ebx-concatenation
(
token
)
(
let
((
term
(
ebnf-ebx-exception
token
))
seq
)
(
or
(
setq
token
(
car
term
)
term
(
cdr
term
))
(
error
"Empty element"
))
(
setq
seq
(
cons
term
seq
))
(
while
(
setq
term
(
ebnf-ebx-exception
token
)
token
(
car
term
)
term
(
cdr
term
))
(
setq
seq
(
cons
term
seq
)))
(
cons
token
(
if
(
=
(
length
seq
)
1
)
;; sequence with only one element
(
car
seq
)
;; a real sequence
(
ebnf-make-sequence
(
nreverse
seq
))))))
;;; exception ::= term ('-' term)?
(
defun
ebnf-ebx-exception
(
token
)
(
let
((
term
(
ebnf-ebx-term
token
)))
(
if
(
eq
(
car
term
)
'exception
)
(
let
((
except
(
ebnf-ebx-term
(
ebnf-ebx-lex
))))
(
cons
(
car
except
)
(
ebnf-make-except
(
cdr
term
)
(
cdr
except
))))
term
)))
;;; term ::= factor ('*' | '+' | '?')?
(
defun
ebnf-ebx-term
(
token
)
(
let
((
factor
(
ebnf-ebx-factor
token
)))
(
when
factor
(
setq
token
(
ebnf-ebx-lex
))
(
cond
((
eq
token
'zero-or-more
)
(
setq
factor
(
ebnf-make-zero-or-more
factor
)
token
(
ebnf-ebx-lex
)))
((
eq
token
'one-or-more
)
(
setq
factor
(
ebnf-make-one-or-more
factor
)
token
(
ebnf-ebx-lex
)))
((
eq
token
'optional
)
(
setq
factor
(
ebnf-token-optional
factor
)
token
(
ebnf-ebx-lex
)))))
(
cons
token
factor
)))
;;; factor ::= hex-char+
;;; | '[' '^'? ( char ( '-' char )? )+ ']'
;;; | '"' 'string' '"'
;;; | "'" "string" "'"
;;; | '(' expression ')'
;;; | symbol
;;;
;;; symbol ::= 'upper or lower case letter'
;;; ('upper or lower case letter' | '-' | '_')*
;;; /* upper and lower 8-bit accentuated characters are included */
;;;
;;; hex-char ::= '#x' [0-9A-Fa-f]+
;;;
;;; char ::= hex-char | 'any character except control characters'
;;; /* 8-bit accentuated characters are included */
;;;
;;; any-char ::= char | 'newline' | 'tab'
(
defun
ebnf-ebx-factor
(
token
)
(
cond
;; terminal
((
eq
token
'terminal
)
(
ebnf-make-terminal
ebnf-ebx-lex
))
;; non-terminal
((
eq
token
'non-terminal
)
(
ebnf-make-non-terminal
ebnf-ebx-lex
))
;; group
((
eq
token
'begin-group
)
(
let
((
body
(
ebnf-ebx-expression
)))
(
or
(
eq
(
car
body
)
'end-group
)
(
error
"Missing `)'"
))
(
cdr
body
)))
;; no element
(
t
nil
)
))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Lexical analyzer
(
defconst
ebnf-ebx-token-table
(
make-vector
256
'error
)
"Vector used to map characters to a lexical token."
)
(
defun
ebnf-ebx-initialize
()
"Initialize EBNFX token table."
;; control character & control 8-bit character are set to `error'
(
let
((
char
?\101
))
;; printable character: A-Z
(
while
(
<
char
?\133
)
(
aset
ebnf-ebx-token-table
char
'non-terminal
)
(
setq
char
(
1+
char
)))
;; printable character: a-z
(
setq
char
?\141
)
(
while
(
<
char
?\173
)
(
aset
ebnf-ebx-token-table
char
'non-terminal
)
(
setq
char
(
1+
char
)))
;; European 8-bit accentuated characters:
(
setq
char
?\240
)
(
while
(
<
char
?\400
)
(
aset
ebnf-ebx-token-table
char
'non-terminal
)
(
setq
char
(
1+
char
)))
;; Override end of line characters:
(
aset
ebnf-ebx-token-table
?\n
'end-of-rule
)
; [NL] linefeed
(
aset
ebnf-ebx-token-table
?\r
'end-of-rule
)
; [CR] carriage return
;; Override space characters:
(
aset
ebnf-ebx-token-table
?\013
'space
)
; [VT] vertical tab
(
aset
ebnf-ebx-token-table
?\t
'space
)
; [HT] horizontal tab
(
aset
ebnf-ebx-token-table
?\
'space
)
; [SP] space
;; Override form feed character:
(
aset
ebnf-ebx-token-table
?\f
'form-feed
)
; [FF] form feed
;; Override other lexical characters:
(
aset
ebnf-ebx-token-table
?#
'hash
)
(
aset
ebnf-ebx-token-table
?\"
'double-quote
)
(
aset
ebnf-ebx-token-table
?\'
'single-quote
)
(
aset
ebnf-ebx-token-table
?\(
'begin-group
)
(
aset
ebnf-ebx-token-table
?\)
'end-group
)
(
aset
ebnf-ebx-token-table
?-
'exception
)
(
aset
ebnf-ebx-token-table
?:
'colon
)
(
aset
ebnf-ebx-token-table
?\[
'begin-square
)
(
aset
ebnf-ebx-token-table
?| 'alternative)
(aset ebnf-ebx-token-table ?* 'zero-or-more)
(aset ebnf-ebx-token-table ?+ 'one-or-more)
(aset ebnf-ebx-token-table ?\? 'optional)
;; Override comment character:
(aset ebnf-ebx-token-table ?/ 'comment)))
;; replace the range "\240-\377" (see `ebnf-range-regexp').
(defconst ebnf-ebx-non-terminal-chars
(ebnf-range-regexp "-_A-Za-z" ?\240 ?\377))
(defconst ebnf-ebx-non-terminal-letter-chars
(ebnf-range-regexp "A-Za-z" ?\240 ?\377))
(defun ebnf-ebx-lex ()
"Lexical analyser for EBNFX.
Return a lexical token.
See documentation for variable `ebnf-ebx-lex'."
(if (>= (point) ebnf-limit)
'end-of-input
(let (token)
;; skip spaces and comments
(while (if (> (following-char) 255)
(progn
(setq token 'error)
nil)
(setq token (aref ebnf-ebx-token-table (following-char)))
(cond
((eq token 'space)
(skip-chars-forward " \013\t" ebnf-limit)
(< (point) ebnf-limit))
((eq token 'comment)
(ebnf-ebx-skip-comment))
((eq token 'form-feed)
(forward-char)
(setq ebnf-action 'form-feed))
((eq token 'end-of-rule)
(ebnf-ebx-skip-end-of-rule))
((and (eq token 'begin-square)
(let ((case-fold-search t))
(looking-at "\\[\\(wfc\\|vc\\
)
:
")))
(ebnf-ebx-skip-constraint))
(t nil)
)))
(cond
;; end of input
((>= (point) ebnf-limit)
'end-of-input)
;; error
((eq token 'error)
(error "
Illegal
character
"))
;; end of rule
((eq token 'end-of-rule)
'end-of-rule)
;; terminal: #x [0-9A-Fa-f]+
((eq token 'hash)
(setq ebnf-ebx-lex (ebnf-ebx-character))
'terminal)
;; terminal: "
string
"
((eq token 'double-quote)
(setq ebnf-ebx-lex (ebnf-ebx-string ?\"))
'terminal)
;; terminal: 'string'
((eq token 'single-quote)
(setq ebnf-ebx-lex (ebnf-ebx-string ?\'))
'terminal)
;; terminal: [ ^? ( char ( - char )? )+ ]
((eq token 'begin-square)
(setq ebnf-ebx-lex (ebnf-ebx-range))
'terminal)
;; non-terminal: NAME
((eq token 'non-terminal)
(setq ebnf-ebx-lex
(ebnf-buffer-substring ebnf-ebx-non-terminal-chars))
'non-terminal)
;; colon: ::=
((eq token 'colon)
(or (looking-at "
:
:=
")
(error "
Missing
`
:
:=
'
token
"))
(forward-char 3)
'production)
;; miscellaneous: (, ), *, +, ?, |, -
(t
(forward-char)
token)
))))
;; replace the range "
\177-\237
" (see `ebnf-range-regexp').
(defconst ebnf-ebx-constraint-chars
(ebnf-range-regexp "
^\000-\010\016-\037]
" ?\177 ?\237))
(defun ebnf-ebx-skip-constraint ()
(or (> (skip-chars-forward ebnf-ebx-constraint-chars ebnf-limit) 0)
(error "
Invalid
character
"))
(or (= (following-char) ?\])
(error "
Missing
end
of
constraint
`
]
'
"))
(forward-char)
t)
(defun ebnf-ebx-skip-end-of-rule ()
(let (eor-p)
(while (progn
;; end of rule ==> 2 or more consecutive end of lines
(setq eor-p (or (> (skip-chars-forward "
\r\n
" ebnf-limit) 1)
eor-p))
;; skip spaces
(skip-chars-forward "
\013\t
" ebnf-limit)
;; skip comments
(and (= (following-char) ?/)
(ebnf-ebx-skip-comment))))
(not eor-p)))
;; replace the range "
\177-\237
" (see `ebnf-range-regexp').
(defconst ebnf-ebx-comment-chars
(ebnf-range-regexp "
^\000-\010\016-\037\\*
" ?\177 ?\237))
(defconst ebnf-ebx-filename-chars
(ebnf-range-regexp "
^\000-\037\\*
" ?\177 ?\237))
(defun ebnf-ebx-skip-comment ()
(forward-char)
(or (= (following-char) ?*)
(error "
Invalid
beginning
of
comment
"))
(forward-char)
(cond
;; open EPS file
((and ebnf-eps-executing (= (following-char) ?\[))
(ebnf-eps-add-context (ebnf-ebx-eps-filename)))
;; close EPS file
((and ebnf-eps-executing (= (following-char) ?\]))
(ebnf-eps-remove-context (ebnf-ebx-eps-filename)))
;; any other action in comment
(t
(setq ebnf-action (aref ebnf-comment-table (following-char))))
)
(while (progn
(skip-chars-forward ebnf-ebx-comment-chars ebnf-limit)
(or (= (following-char) ?*)
(error "
Missing
end
of
comment
"))
(forward-char)
(and (/= (following-char) ?/)
(< (point) ebnf-limit))))
;; check for a valid end of comment
(and (>= (point) ebnf-limit)
(error "
Missing
end
of
comment
"))
(forward-char)
t)
(defun ebnf-ebx-eps-filename ()
(forward-char)
(let (fname nchar)
(while (progn
(setq fname
(concat fname
(ebnf-buffer-substring ebnf-ebx-filename-chars)))
(and (< (point) ebnf-limit)
(> (setq nchar (skip-chars-forward "
*
" ebnf-limit)) 0)
(< (point) ebnf-limit)
(/= (following-char) ?/)))
(setq fname (concat fname (make-string nchar ?*))
nchar nil))
(if (or (not nchar) (= nchar 0))
fname
(and (< (point) ebnf-limit)
(= (following-char) ?/)
(setq nchar (1- nchar)))
(concat fname (make-string nchar ?*)))))
;; replace the range "
\240-\377
" (see `ebnf-range-regexp').
(defconst ebnf-ebx-double-string-chars
(ebnf-range-regexp "
\t
-!#-~
" ?\240 ?\377))
(defconst ebnf-ebx-single-string-chars
(ebnf-range-regexp "
\t
-&
(
-~
" ?\240 ?\377))
(defun ebnf-ebx-string (delim)
(buffer-substring-no-properties
(progn
(forward-char)
(point))
(progn
(skip-chars-forward (if (= delim ?\")
ebnf-ebx-double-string-chars
ebnf-ebx-single-string-chars)
ebnf-limit)
(or (= (following-char) delim)
(error "
Missing
string
delimiter
`
%c
'
" delim))
(prog1
(point)
(forward-char)))))
(defun ebnf-ebx-character ()
;; #x [0-9A-Fa-f]+
(buffer-substring-no-properties
(point)
(progn
(ebnf-ebx-hex-character)
(point))))
(defun ebnf-ebx-range ()
;; [ ^? ( char ( - char )? )+ ]
(buffer-substring-no-properties
(point)
(progn
(forward-char)
(and (= (following-char) ?^)
(forward-char))
(and (= (following-char) ?-)
(forward-char))
(while (progn
(ebnf-ebx-any-character)
(when (= (following-char) ?-)
(forward-char)
(ebnf-ebx-any-character))
(and (/= (following-char) ?\])
(< (point) ebnf-limit))))
(and (>= (point) ebnf-limit)
(error "
Missing
end
of
character
range
`
]
'
"))
(forward-char)
(point))))