classIndex(object): def__init__(self, t, k): """创建不同长度的索引""" self.k = k # kmer 长度 self.index = [] for i inrange(len(t) - k + 1): # 循环每个k-mer self.index.append((t[i:i+k], i)) # 追加(k-mer, offset) self.index.sort() # 按字母顺序排列 defquery(self, p): """返回P第一个k-mer匹配的索引""" kmer = p[:self.k] i = bisect.bisect_left(self.index, (kmer, -1)) # 二分法查找 hits = [] while i < len(self.index): if self.index[i][0] != kmer: break hits.append(self.index[i][1]) i += 1 return hits
defqueryIndex(p, t, index): k = index.k offsets = [] for i in index.query(p): if p[k:] == t[i+k:i+len(p)]: # 验证P剩下的字符 len(p) - k offsets.append(i) return offsets
t = 'ACTTGGAGATCTTTGAGGCTAGGTATTCGGGATCGAAGCTCATTTCGGGGATCGATTACGATATGGTGGGTATTCGGGA' p = 'GGTATTCGGGA'
index = Index(t, 4) print(queryIndex(p, t, index))