# BoyerMoore def search(pattern, text): print ("pattern =", pattern) print ("text =", text) n = len(text) m = len(pattern) print ("m = %d, n = %d" %(m,n)) badSymbolTable = [m]*128 # ASCII characters populateBadSymbolTable(badSymbolTable, pattern, m-1) goodSuffixTable = [m]*m populateGoodSuffixTable(pattern, goodSuffixTable) i = m -1 while i < n: printLineup(pattern, text, i) k = 0 while k < m and pattern[m-1-k]==text[i-k]: k += 1; if k==m: return i - m + 1 t1 = badSymbolTable[ord(text[i-k])] d1 = max(1, t1 - k) print ("i = ", i, " k = ", k," t1 = ", t1, " d1 = ", d1, end=" ") if k==0: print() i += d1 else: i += max(d1, goodSuffixTable[k]) print (" d2 = ", goodSuffixTable[k]) return -1 def populateGoodSuffixTable(pattern, goodSuffixTable): m = len(pattern) for k in range(1, m): found = False matchedSuffix = pattern[m-k : ] # print ("matched suffix = ", matchedSuffix) start = m-k-1 while start > 0: # print ("possible match ", pattern[start:start+k]) if pattern[start : start+k] == matchedSuffix: if pattern[start-1] != pattern[m-k-1]: goodSuffixTable[k] = (m - k) - start found = True break start -= 1 if not found: # print ("not found") for j in range(k, 0, -1): # print (pattern[ : j] + " =? " + pattern[m-j : m]) if pattern[ : j] == pattern[m-j : m]: goodSuffixTable[k] = m-j # print ("match") found = True break # print ("default: ", goodSuffixTable[k]) print ("GoodSuffixTable:", end=" ") for i in range(1, m): print ("(" + str(i) + "," + str(goodSuffixTable[i]) + ")",end=" ") print () def populateBadSymbolTable(table, pattern, mMinusOne): for i in range(mMinusOne): table[ord(pattern[i])] = mMinusOne - i print ("shiftTable: ") for c in pattern: print (c + str(table[ord(c)]),end=" ") print() def printLineup(pattern, text, i): print (text) print (" "*(i - len(pattern) + 1) + pattern) #print (search("ab", "abc")) #print (search("ab", "aabc")) #print (search("ab", "acabc")) #print (search("abd", "acabc")) #print (search("wowwow", "awowwow")) print (search("banana", "anna and nan in a cabana ate a banal banana")) #print (search ("abracadabra", "abracadabtabradabracadabcadaxbrabbracadabraxxxxxxabracadabracadabra")) #print (search ("abcdcbcabcabc", "abracadabtabradabracadabcadaxbrabbracadabraxxxxxxabracadabracadabra")) #print (search ("hollyholdsalcohol", "sallyholdsalcoholwhyodoeshollyholdalcohol?hollyholdsalcohol")) #print (search("abcdcbcabcabc", "abc")) #print(search("wowwow", "bracadaabrabadabraabracacabracadabracadabra")) #print(search("banana", "bracadaabrabadabraabracacabracadabracadabra")) #print(search("holdalcohol", "bracadaabrabadabraabracacabracadabracadabra"))