Implementation of Suffix Tree (Ukkonen)

Revision en1, by skloj, 2020-03-07 10:35:19

After trying many implementations of Suffix Tree and Suffix Array in Python, the fastest I managed to get was based on adamant Ukkonen version: https://codeforces.me/blog/entry/16780

Here is my Python version enhanced with memoization (I tested it with UVA 10679 — I Love Strings!!), happy to receive feedback on how to improve the algorithm:

from sys import stdin, stdout, stderr, setrecursionlimit
from functools import lru_cache
setrecursionlimit(100000)

def read():
    return stdin.readline().rstrip()

def readint():
    return int(read())

def make_node(_pos, _len):
    global s, n, sz, to, link, fpos, slen, pos, node
    fpos[sz] = _pos
    slen[sz] = _len
    sz += 1
    return sz-1

def go_edge():
    global s, n, sz, to, link, fpos, slen, pos, node
    while (pos > slen[to[node].get(s[n - pos], 0)]):
        node = to[node].get(s[n - pos], 0)
        pos -= slen[node]

def add_letter(c):
    global s, n, sz, to, link, fpos, slen, pos, node
    s[n] = c
    n += 1
    pos += 1
    last = 0
    while(pos > 0):
        go_edge()
        edge = s[n - pos]
        v = to[node].get(edge, 0)
        t = s[fpos[v] + pos - 1]
        if (v == 0):
            to[node][edge] = make_node(n - pos, inf)
            link[last] = node
            last = 0
        elif (t == c):
            link[last] = node
            return
        else:
            u = make_node(fpos[v], pos - 1)
            to[u][c] = make_node(n - 1, inf)
            to[u][t] = v
            fpos[v] += pos - 1
            slen[v] -= pos - 1
            to[node][edge] = u
            link[last] = u
            last = u
        if(node == 0):
            pos -= 1
        else:
            node = link[node]

def init_tree(st):
    global slen, ans, inf, maxn, s, to, fpos, slen, link, node, pos, sz, n
    inf = int(1e9)
    maxn = len(st)*2+1 #int(1e6+1)
    s = [0]*maxn
    to = [{} for i in range(maxn)]
    fpos, slen, link = [0]*maxn, [0]*maxn, [0]*maxn
    node, pos = 0, 0
    sz = 1
    n = 0
    slen[0] = inf
    ans = 0
    for c in st:
        add_letter(ord(c))

def traverse_edge(st, idx, start, end):
    global len_text, len_st
    k = start
    while k <= end and k < len_text and idx < len_st:
        if text[k] != st[idx]:
            return -1
        k += 1
        idx += 1
    if idx == len_st:
        return idx
    return 0

def edgelen(v, init, e):
    if(v == 0):
        return 0
    return e-init+1

@lru_cache(maxsize=10000001)
def traverse(v, st, idx):
    global len_st
    r = -1
    init = fpos[v]
    end = fpos[v]+slen[v]
    e = end-1
    if v != 0:
        r = traverse_edge(st, idx, init, e)
        if r != 0:
            if r == -1:
                return []
            return [r]
    idx = idx + edgelen(v, init, e)
    if idx > len_st:
        return []
    k = ord(st[idx])
    children = to[v]
    if k in children:
        vv = children.get(k, 0)
        return traverse(vv, st, idx)
    return []

@lru_cache(maxsize=1001*10)
def solve(T, query):
    traverse.cache_clear()
    return "y\n" if traverse(0, query, 0) else "n\n"

def main():
    global text, len_st, len_text
    k = readint()
    for ki in range(k):
        text = read()+"$"
        len_text = len(text)
        init_tree(text)
        q = readint()
        for qi in range(q):
            query = read()
            len_st = len(query)
            stdout.write(solve(text, query))

main()
Tags #suffix tree, #python, #strings

History

 
 
 
 
Revisions
 
 
  Rev. Lang. By When Δ Comment
en1 English skloj 2020-03-07 10:35:19 3613 Initial revision (published)