Levenshtein Distance Algorithm: Rscheme Implementation
by Joerg F. Wittenberger
;;** Levenshtein
;; See http://www.merriampark.com/ld.htm
(define-glue (lev-init mx)
{
int i,m = fx2int(mx), *p;
REG0 = bvec_alloc( sizeof(int) * (m+1), byte_vector_class );
p = (int*) PTR_TO_DATAPTR(REG0);
for(i=0; i<=m; ++i) p[i]=i;
RETURN1();
})
(define-glue (lev-dist d n)
{
int *p=PTR_TO_DATAPTR(d);
REG0 = int2fx( p[ fx2int(n) ] );
RETURN1();
})
(define-glue (lev-step! matrix mx ix a b off)
{
#define min(a, b) (((a) < (b)) ? (a) : (b))
int i=fx2int(ix), o=fx2int(off), m=fx2int(mx);
int *d_i = (int*) PTR_TO_DATAPTR(matrix);
unsigned char *s = string_text(a) + o;
unsigned char *t = string_text(b) + o;
int distance=d_i[0], j, left, cost;
d_i[0]=i;
for(j=1; j<=m; ++j) { /* row loop */
left = d_i[j];
/* Step 5 */
cost = s[i-1]==t[j-1] ? 0 : 1;
/* Step 6 */
d_i[j] = min(min(d_i[j-1]+1, left+1), distance+cost);
distance = left;
}
REG0 = int2fx(distance);
RETURN1();
})
(define-safe-glue (lev-0 (a <string>) (b <string>))
{
unsigned char *s = string_text(a);
unsigned char *t = string_text(b);
int i=0, n=string_length(b), m=string_length(a);
/* skip common suffix */
while( m>0 && n>0 && s[m-1]==t[n-1] ) --m, --n;
/* skip common prefix */
while( m>0 && n>0 && s[i] == t[i] ) ++i, --m, --n;
REG0 = int2fx(i);
REG1 = int2fx(m);
REG2 = int2fx(n);
RETURN(3);
})
(define (levenshtein-distance s t)
(receive
(off sl tl) (lev-0 s t)
(cond
((eqv? sl 0) tl)
((eqv? tl 0) sl)
(else
(if (< tl sl)
(lev-exec (lev-init sl) s t off sl tl)
(lev-exec (lev-init tl) t s off tl sl))))))
(define (lev-exec matrix s t o m n)
(do ((i 1 (add1 i)))
((> i n) (lev-dist matrix m))
(lev-step! matrix m i s t o)))
(define (lev-exec< matrix s t o m n limit)
(let loop ((i 1) (distance 0))
(cond
((>= distance limit) #f)
((> i n) (>= (lev-dist matrix m) limit))
(else (loop (add1 i) (lev-step! matrix m i s t o))))))
(define (levenshtein< s t limit)
(receive
(off sl tl) (lev-0 s t)
(cond
((eqv? sl 0) (< tl limit))
((eqv? tl 0) (< sl limit))
(else
(if (< tl sl)
(lev-exec< (lev-init sl) s t off sl tl limit)
(lev-exec< (lev-init tl) t s off tl sl limit))))))