Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
emacs
emacs
Commits
f4646fff
Commit
f4646fff
authored
Apr 16, 2009
by
Andreas Schwab
Browse files
(boyer_moore): Use zero as marker value for a possible
match instead of depending on overflow behavior.
parent
05fcb8da
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
74 additions
and
98 deletions
+74
-98
src/ChangeLog
src/ChangeLog
+5
-0
src/search.c
src/search.c
+69
-98
No files found.
src/ChangeLog
View file @
f4646fff
2009-04-16 Andreas Schwab <schwab@linux-m68k.org>
* search.c (boyer_moore): Use zero as marker value for a possible
match instead of depending on overflow behavior.
2009-04-16 Chong Yidong <cyd@stupidchicken.com>
2009-04-16 Chong Yidong <cyd@stupidchicken.com>
* keyboard.c (adjust_point_for_property): Disable 2009-02-12
* keyboard.c (adjust_point_for_property): Disable 2009-02-12
...
...
src/search.c
View file @
f4646fff
...
@@ -1729,9 +1729,8 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -1729,9 +1729,8 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
{
{
int
direction
=
((
n
>
0
)
?
1
:
-
1
);
int
direction
=
((
n
>
0
)
?
1
:
-
1
);
register
int
dirlen
;
register
int
dirlen
;
int
infinity
,
limit
,
stride_for_teases
=
0
;
int
limit
,
stride_for_teases
=
0
;
register
int
*
BM_tab
;
int
BM_tab
[
0400
];
int
*
BM_tab_base
;
register
unsigned
char
*
cursor
,
*
p_limit
;
register
unsigned
char
*
cursor
,
*
p_limit
;
register
int
i
,
j
;
register
int
i
,
j
;
unsigned
char
*
pat
,
*
pat_end
;
unsigned
char
*
pat
,
*
pat_end
;
...
@@ -1747,37 +1746,28 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -1747,37 +1746,28 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
int
translate_prev_byte3
=
0
;
int
translate_prev_byte3
=
0
;
int
translate_prev_byte4
=
0
;
int
translate_prev_byte4
=
0
;
BM_tab
=
(
int
*
)
alloca
(
0400
*
sizeof
(
int
));
/* The general approach is that we are going to maintain that we know
the first (closest to the present position, in whatever direction
/* The general approach is that we are going to maintain that we know */
we're searching) character that could possibly be the last
/* the first (closest to the present position, in whatever direction */
(furthest from present position) character of a valid match. We
/* we're searching) character that could possibly be the last */
advance the state of our knowledge by looking at that character
/* (furthest from present position) character of a valid match. We */
and seeing whether it indeed matches the last character of the
/* advance the state of our knowledge by looking at that character */
pattern. If it does, we take a closer look. If it does not, we
/* and seeing whether it indeed matches the last character of the */
move our pointer (to putative last characters) as far as is
/* pattern. If it does, we take a closer look. If it does not, we */
logically possible. This amount of movement, which I call a
/* move our pointer (to putative last characters) as far as is */
stride, will be the length of the pattern if the actual character
/* logically possible. This amount of movement, which I call a */
appears nowhere in the pattern, otherwise it will be the distance
/* stride, will be the length of the pattern if the actual character */
from the last occurrence of that character to the end of the
/* appears nowhere in the pattern, otherwise it will be the distance */
pattern. If the amount is zero we have a possible match. */
/* from the last occurrence of that character to the end of the */
/* pattern. */
/* Here we make a "mickey mouse" BM table. The stride of the search
/* As a coding trick, an enormous stride is coded into the table for */
is determined only by the last character of the putative match.
/* characters that match the last character. This allows use of only */
If that character does not match, we will stride the proper
/* a single test, a test for having gone past the end of the */
distance to propose a match that superimposes it on the last
/* permissible match region, to test for both possible matches (when */
instance of a character that matches it (per trt), or misses
/* the stride goes past the end immediately) and failure to */
it entirely if there is none. */
/* match (where you get nudged past the end one stride at a time). */
/* Here we make a "mickey mouse" BM table. The stride of the search */
/* is determined only by the last character of the putative match. */
/* If that character does not match, we will stride the proper */
/* distance to propose a match that superimposes it on the last */
/* instance of a character that matches it (per trt), or misses */
/* it entirely if there is none. */
dirlen
=
len_byte
*
direction
;
dirlen
=
len_byte
*
direction
;
infinity
=
dirlen
-
(
lim_byte
+
pos_byte
+
len_byte
+
len_byte
)
*
direction
;
/* Record position after the end of the pattern. */
/* Record position after the end of the pattern. */
pat_end
=
base_pat
+
len_byte
;
pat_end
=
base_pat
+
len_byte
;
...
@@ -1787,23 +1777,14 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -1787,23 +1777,14 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
if
(
direction
<
0
)
if
(
direction
<
0
)
base_pat
=
pat_end
-
1
;
base_pat
=
pat_end
-
1
;
BM_tab_base
=
BM_tab
;
/* A character that does not appear in the pattern induces a
BM_tab
+=
0400
;
stride equal to the pattern length. */
j
=
dirlen
;
/* to get it in a register */
for
(
i
=
0
;
i
<
0400
;
i
++
)
/* A character that does not appear in the pattern induces a */
BM_tab
[
i
]
=
dirlen
;
/* stride equal to the pattern length. */
while
(
BM_tab_base
!=
BM_tab
)
{
*--
BM_tab
=
j
;
*--
BM_tab
=
j
;
*--
BM_tab
=
j
;
*--
BM_tab
=
j
;
}
/* We use this for translation, instead of TRT itself.
/* We use this for translation, instead of TRT itself.
We fill this in to handle the characters that actually
We fill this in to handle the characters that actually
occur in the pattern. Others don't matter anyway! */
occur in the pattern. Others don't matter anyway! */
bzero
(
simple_translate
,
sizeof
simple_translate
);
for
(
i
=
0
;
i
<
0400
;
i
++
)
for
(
i
=
0
;
i
<
0400
;
i
++
)
simple_translate
[
i
]
=
i
;
simple_translate
[
i
]
=
i
;
...
@@ -1828,12 +1809,10 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -1828,12 +1809,10 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
}
}
i
=
0
;
i
=
0
;
while
(
i
!=
infinity
)
while
(
i
!=
dirlen
)
{
{
unsigned
char
*
ptr
=
base_pat
+
i
;
unsigned
char
*
ptr
=
base_pat
+
i
;
i
+=
direction
;
i
+=
direction
;
if
(
i
==
dirlen
)
i
=
infinity
;
if
(
!
NILP
(
trt
))
if
(
!
NILP
(
trt
))
{
{
/* If the byte currently looking at is the last of a
/* If the byte currently looking at is the last of a
...
@@ -1861,7 +1840,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -1861,7 +1840,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
else
else
j
=
*
ptr
;
j
=
*
ptr
;
if
(
i
==
infinity
)
if
(
i
==
dirlen
)
stride_for_teases
=
BM_tab
[
j
];
stride_for_teases
=
BM_tab
[
j
];
BM_tab
[
j
]
=
dirlen
-
i
;
BM_tab
[
j
]
=
dirlen
-
i
;
...
@@ -1894,17 +1873,16 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -1894,17 +1873,16 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
{
{
j
=
*
ptr
;
j
=
*
ptr
;
if
(
i
==
infinity
)
if
(
i
==
dirlen
)
stride_for_teases
=
BM_tab
[
j
];
stride_for_teases
=
BM_tab
[
j
];
BM_tab
[
j
]
=
dirlen
-
i
;
BM_tab
[
j
]
=
dirlen
-
i
;
}
}
/* stride_for_teases tells how much to stride if we get a
*/
/* stride_for_teases tells how much to stride if we get a
/*
match on the far character but are subsequently
*/
match on the far character but are subsequently
/*
disappointed, by recording what the stride would have been
*/
disappointed, by recording what the stride would have been
/*
for that character if the last character had been
*/
for that character if the last character had been
/*
different. */
different.
*/
}
}
infinity
=
dirlen
-
infinity
;
pos_byte
+=
dirlen
-
((
direction
>
0
)
?
direction
:
0
);
pos_byte
+=
dirlen
-
((
direction
>
0
)
?
direction
:
0
);
/* loop invariant - POS_BYTE points at where last char (first
/* loop invariant - POS_BYTE points at where last char (first
char if reverse) of pattern would align in a possible match. */
char if reverse) of pattern would align in a possible match. */
...
@@ -1948,43 +1926,34 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -1948,43 +1926,34 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
p_limit
=
BYTE_POS_ADDR
(
limit
);
p_limit
=
BYTE_POS_ADDR
(
limit
);
p2
=
(
cursor
=
BYTE_POS_ADDR
(
pos_byte
));
p2
=
(
cursor
=
BYTE_POS_ADDR
(
pos_byte
));
/* In this loop, pos + cursor - p2 is the surrogate for pos */
/* In this loop, pos + cursor - p2 is the surrogate for pos
.
*/
while
(
1
)
/* use one cursor setting as long as i can */
while
(
1
)
/* use one cursor setting as long as i can */
{
{
if
(
direction
>
0
)
/* worth duplicating */
if
(
direction
>
0
)
/* worth duplicating */
{
{
/* Use signed comparison if appropriate
while
(
cursor
<=
p_limit
)
to make cursor+infinity sure to be > p_limit.
{
Assuming that the buffer lies in a range of addresses
if
(
BM_tab
[
*
cursor
]
==
0
)
that are all "positive" (as ints) or all "negative",
goto
hit
;
either kind of comparison will work as long
as we don't step by infinity. So pick the kind
that works when we do step by infinity. */
if
((
EMACS_INT
)
(
p_limit
+
infinity
)
>
(
EMACS_INT
)
p_limit
)
while
((
EMACS_INT
)
cursor
<=
(
EMACS_INT
)
p_limit
)
cursor
+=
BM_tab
[
*
cursor
];
else
while
((
EMACS_UINT
)
cursor
<=
(
EMACS_UINT
)
p_limit
)
cursor
+=
BM_tab
[
*
cursor
];
cursor
+=
BM_tab
[
*
cursor
];
}
}
}
else
else
{
{
if
((
EMACS_INT
)
(
p_limit
+
infinity
)
<
(
EMACS_INT
)
p_limit
)
while
(
cursor
>=
p_limit
)
while
((
EMACS_INT
)
cursor
>=
(
EMACS_INT
)
p_limit
)
{
cursor
+=
BM_tab
[
*
cursor
];
if
(
BM_tab
[
*
cursor
]
==
0
)
else
goto
hit
;
while
((
EMACS_UINT
)
cursor
>=
(
EMACS_UINT
)
p_limit
)
cursor
+=
BM_tab
[
*
cursor
];
cursor
+=
BM_tab
[
*
cursor
];
}
}
}
/* If you are here, cursor is beyond the end of the searched region. */
/* If you are here, cursor is beyond the end of the
/* This can happen if you match on the far character of the pattern, */
searched region. You fail to match within the
/* because the "stride" of that character is infinity, a number able */
permitted region and would otherwise try a character
/* to throw you well beyond the end of the search. It can also */
beyond that region. */
/* happen if you fail to match within the permitted region and would */
break
;
/* otherwise try a character beyond that region */
if
((
cursor
-
p_limit
)
*
direction
<=
len_byte
)
hit:
break
;
/* a small overrun is genuine */
cursor
-=
infinity
;
/* large overrun = hit */
i
=
dirlen
-
direction
;
i
=
dirlen
-
direction
;
if
(
!
NILP
(
trt
))
if
(
!
NILP
(
trt
))
{
{
...
@@ -2056,8 +2025,8 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -2056,8 +2025,8 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
pos_byte
+=
cursor
-
p2
;
pos_byte
+=
cursor
-
p2
;
}
}
else
else
/* Now we'll pick up a clump that has to be done the hard
*/
/* Now we'll pick up a clump that has to be done the hard
/*
way because it covers a discontinuity */
way because it covers a discontinuity
.
*/
{
{
limit
=
((
direction
>
0
)
limit
=
((
direction
>
0
)
?
BUFFER_CEILING_OF
(
pos_byte
-
dirlen
+
1
)
?
BUFFER_CEILING_OF
(
pos_byte
-
dirlen
+
1
)
...
@@ -2069,19 +2038,21 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -2069,19 +2038,21 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
and still be valid for a possible match. */
and still be valid for a possible match. */
while
(
1
)
while
(
1
)
{
{
/* This loop can be coded for space rather than
*/
/* This loop can be coded for space rather than
/*
speed because it will usually run only once.
*/
speed because it will usually run only once.
/*
(the reach is at most len + 21, and typically
*/
(the reach is at most len + 21, and typically
/*
does not exceed len) */
does not exceed len)
.
*/
while
((
limit
-
pos_byte
)
*
direction
>=
0
)
while
((
limit
-
pos_byte
)
*
direction
>=
0
)
pos_byte
+=
BM_tab
[
FETCH_BYTE
(
pos_byte
)];
{
/* now run the same tests to distinguish going off the */
int
ch
=
FETCH_BYTE
(
pos_byte
);
/* end, a match or a phony match. */
if
(
BM_tab
[
ch
]
==
0
)
if
((
pos_byte
-
limit
)
*
direction
<=
len_byte
)
goto
hit2
;
break
;
/* ran off the end */
pos_byte
+=
BM_tab
[
ch
];
/* Found what might be a match.
}
Set POS_BYTE back to last (first if reverse) pos. */
break
;
/* ran off the end */
pos_byte
-=
infinity
;
hit2:
/* Found what might be a match. */
i
=
dirlen
-
direction
;
i
=
dirlen
-
direction
;
while
((
i
-=
direction
)
+
direction
!=
0
)
while
((
i
-=
direction
)
+
direction
!=
0
)
{
{
...
@@ -2110,7 +2081,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
...
@@ -2110,7 +2081,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
/* Above loop has moved POS_BYTE part or all the way
/* Above loop has moved POS_BYTE part or all the way
back to the first pos (last pos if reverse).
back to the first pos (last pos if reverse).
Set it once again at the last (first if reverse) char. */
Set it once again at the last (first if reverse) char. */
pos_byte
+=
dirlen
-
i
-
direction
;
pos_byte
+=
dirlen
-
i
-
direction
;
if
(
i
+
direction
==
0
)
if
(
i
+
direction
==
0
)
{
{
int
position
,
start
,
end
;
int
position
,
start
,
end
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment