Whilst checking out the solution video of suffix arrays, I found that I made something substantially different from the actual suffix array search.
This is my code:
class SAS(object):
def __init__(self, string):
self.string_SA = []
for i in range(0, len(string)):
self.string_SA.append(string[i:])
self.string_SA = sorted(self.string_SA)
#find_shortest:
#Find the shortest substring that has this beginning. In the example above, if I search for
#”abra” then it should return ”abra”, not ”abracadabra”.
def find_shortest(self, suffix, middle = None, length_array = None):
suffix_array = self.string_SA
length_suffix = len(suffix)
#find middle of to be searched suffix array
if middle == None:
middle = round((len(suffix_array) / 2))
length_array = round((len(suffix_array) / 2))
#if the suffix is in the middle
if suffix in suffix_array[middle][0:length_suffix]:
#check for shorter versions to the right and to the left
shortest = suffix_array[middle]
found_middle_right = middle + 1
found_middle_left = middle - 1
if suffix_array[found_middle_right]:
while suffix in suffix_array[found_middle_right][0:length_suffix]:
if len(suffix_array[found_middle_right]) < len(shortest):
shortest = (suffix_array[found_middle_right])
found_middle_right += 1
if suffix_array[found_middle_left]:
while suffix in suffix_array[found_middle_left][0:length_suffix]:
if len(suffix_array[found_middle_left]) < len(shortest):
shortest = (suffix_array[found_middle_left])
found_middle_left -= 1
return shortest
if length_array <= 1:
print("not found")
return
#if the suffix is lexaconically smaller than the middle of the array
if suffix < suffix_array[middle]:
length_array = round((length_array / 2))
middle = middle - length_array
return self.find_shortest(suffix, middle, length_array)
#if the suffix is lexaconically greater than the middle of the array
if suffix > suffix_array[middle]:
length_array = round((length_array / 2))
middle = middle + length_array
return self.find_shortest(suffix, middle, length_array)
else:
print("not found")
return
#find_longest:
def find_longest(self, suffix, middle = None, length_array = None):
suffix_array = self.string_SA
length_suffix = len(suffix)
#find middle of to be searched suffix array
if middle == None:
middle = round((len(suffix_array) / 2))
length_array = round((len(suffix_array) / 2))
#if the suffix is in the middle
if suffix in suffix_array[middle][0:length_suffix]:
#check for shorter versions to the right and to the left
longest = suffix_array[middle]
found_middle_right = middle + 1
found_middle_left = middle - 1
if suffix_array[found_middle_right]:
while suffix in suffix_array[found_middle_right][0:length_suffix]:
if len(suffix_array[found_middle_right]) > len(longest):
longest = (suffix_array[found_middle_right])
found_middle_right += 1
if suffix_array[found_middle_left]:
while suffix in suffix_array[found_middle_left][0:length_suffix]:
if len(suffix_array[found_middle_left]) > len(longest):
longest = (suffix_array[found_middle_left])
found_middle_left -= 1
return longest
if length_array <= 1:
print("not found")
return
#if the suffix is lexaconically smaller than the middle of the array
if suffix < suffix_array[middle]:
length_array = round((length_array / 2))
middle = middle - length_array
return self.find_longest(suffix, middle, length_array)
#if the suffix is lexaconically greater than the middle of the array
if suffix > suffix_array[middle]:
length_array = round((length_array / 2))
middle = middle + length_array
return self.find_longest(suffix, middle, length_array)
else:
print("not found")
return
#find_all:
def find_all(self, suffix, middle = None, length_array = None):
#Find all substrings that start with this beginning. This means ”abra” returns both ”abra” and
#”abracadabra”.
suffix_array = self.string_SA
length_suffix = len(suffix)
#find middle of to be searched suffix array
if middle == None:
middle = round((len(suffix_array) / 2))
length_array = round((len(suffix_array) / 2))
#if the suffix is in the middle
if suffix in suffix_array[middle][0:length_suffix]:
#check for shorter versions to the right and to the left
list_of_suffix = []
list_of_suffix.append(suffix_array[middle])
found_middle_right = middle + 1
found_middle_left = middle - 1
if suffix_array[found_middle_right]:
while suffix in suffix_array[found_middle_right][0:length_suffix]:
list_of_suffix.append(suffix_array[found_middle_right])
found_middle_right += 1
if suffix_array[found_middle_left]:
while suffix in suffix_array[found_middle_left][0:length_suffix]:
list_of_suffix.append(suffix_array[found_middle_right])
found_middle_left -= 1
return list_of_suffix
if length_array <= 1:
print("not found")
return
#if the suffix is lexaconically smaller than the middle of the array
if suffix < suffix_array[middle]:
length_array = round((length_array / 2))
middle = middle - length_array
return self.find_all(suffix, middle, length_array)
#if the suffix is lexaconically greater than the middle of the array
if suffix > suffix_array[middle]:
length_array = round((length_array / 2))
middle = middle + length_array
return self.find_all(suffix, middle, length_array)
else:
print("not found")
return
#You’ll want to have a good automated test for this, plus have some performance measurements. We’ll
#be using those in later exercises. Once you are done you’ll want to do the Study Drills to complete this
#exercise.
My version doesnt use any index at all. Would it be usefull in any situation, and is it recommended to go back and implement te version from the solution video?