count_repeat
def count_repeat(seq, min_window, max_window):
repeat_count_by_window = {}
for window in range(min_window, max_window+1):
print("window {0} -----------------------------------------".format(window))
seq_parts = set([])
for start_index in range(0, len(seq)):
focus_letters = seq[start_index:start_index+window]
if len(focus_letters) == window:
seq_parts.add(focus_letters)
print("seq_parts: {0}".format(seq_parts))
repeat_count = 0
for seq_part in seq_parts:
#print("{0}: {1}".format(seq_part, seq.count(seq_part)))
repeat_count += seq.count(seq_part)
repeat_count_by_window[window] = repeat_count
#print("repeat: {0}".format(repeat_count_by_window[window]))
print("============================")
print(repeat_count_by_window)
return repeat_count_by_window
n = "MRVKTFVILCCALQYVAYTNANINDFDEDYFGSDVTVQSSNTTDEIIRDASGAVIEEQITTKKMQRKNKNHGILGKNEKMIKTFVITTDSDGNESIVEEDVLMKTLSDGTVAQSYVAADAGAYSQSGPYVSNSGYSTHQGYTSDFSTSAAVGAGAGAGAAAGSGAGAGAGYGAASGAGAGAGAGAGAGYGTGAGAGAGAGYGAGAGAGAGAGYGAGAGAGAGAGYGAGAGAGAGAGYGAGAGAGAGAGYGAGAGAGAGAGYGAASGAGAGAGYGQGVGSGAASGAGAGAGAGSAAGSGAGAGAGTGAGAGYGAGAGAGAGAGYGAASGTGAGYGAGAGAGYGGASGAGAGAGAGAGAGAGAGYGTGAGYGAGAGAGAGAGAGAGYGAGAGAGYGAGYGVGAGAGYGAGYGAGAGSGAASGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGTGAGSGAGAGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGAGAGYGAGAGAGYGAGAGVGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGVGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVANGGYSRSDGYEYAWSSDFGTGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGVGYGAGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGVGSGAGAGSGAGAGVGYGAGAGVGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGVGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGAGAGYGAGYGAGAGAGYGAGAGSGAASGAGSGAGAGSGAGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGVGYGAGYGAGAGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVAHGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGSGAGAGSGAGAGSGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGVGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGVGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVANGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGSGAGAGSGAGAGSGAGAGYGAGVGAGYGVGYGAGAGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVAHGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAAYGAGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGYGAGAGAGYGAGYGAGAGAGYGAGAGTGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGAGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVAHGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGAGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGTGSGAGAGSGAGAGYGAGVGAGYGAGAGSGAAFGAGAGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGVGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVANGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGVGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGYGVGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGVGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGVGYGAGVGAGYGAGAGSGAASGAGAGSGAGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGVGAGYGAGAGVGYGAGAGAGYGAGAGSGAASGAGAGAGSGAGAGTGAGAGSGAGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVANGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGAGSGTGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGVGAGYGVGYGAGAGAGYGVGYGAGAGAGYGAGAGSGTGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGVGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGYGVGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGVGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGVGYGAGAGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVANGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGSGAGAGYGAGYGAGVGAGYGAGAGVGYGAGAGAGYGAGAGSGAASGAGAGAGAGAGSGAGAGSGAGAGAGSGAGAGYGAGYGIGVGAGYGAGAGVGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGVGAGYGAGAGVGYGAGAGAGYGAGAGSGAASGAGAGAGAGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVNGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGYGAGYGAGVGAGYGAGAGVGYGAGAGAGYGAGAGSGAASGAGAGSGSGAGSGAGAGSGAGAGSGAGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGAGSGAASGAGAGAGAGAGTGSSGFGPYVANGGYSGYEYAWSSESDFGTGSGAGAGSGAGAGSGAGAGYGAGVGAGYGAGYGAGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGAGAGAGYGAGAGVGYGAGAGAGYGAGAGSGAGSGAGAGSGSGAGAGSGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGYGIGVGAGYGAGAGVGYGAGAGAGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGYGAGAGVGYGAGAGSGAASGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGYGAGYGAGVGAGYGAGAGYGAGYGVGAGAGYGAGAGSGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGAGSGAGSGAGAGYGAGAGAGYGAGAGAGYGAGAGSGAASGAGAGAGAGSGAGAGSGAGAGSGAGSGAGAGSGAGAGYGAGAGSGAASGAGAGSGAGAGAGAGAGAGSGAGAGSGAGAGYGAGAGSGAASGAGAGAGAGTGSSGFGPYVANGGYSRREGYEYAWSSKSDFETGSGAASGAGAGAGSGAGAGSGAGAGSGAGAGSGAGAGGSVSYGAGRGYGQGAGSAASSVSSASSRSYDYSRRNVRKNCGIPRRQLVVKFRALPCVNC"
p = "MKPIFLVLLVATSAYAAPSVTINQYSDNEIPRDIDDGKASSVISRAWDYVDDTDKSIAILNVQEILKDMASQGDYASQASAVAQTAGIIAHLSAGIPGDACAAANVINSYTDGVRSGNFAGFRQSLGPFFGHVGQNLNLINQLVINPGQLRYSVGPALGCAGGGRIYDFEAAWDAILASSDSSFLNEEYCIVKRLYNSRNSQSNNIAAYITAHLLPPVAQVFHQSAGSITDLLRGVGNGNDATGLVANAQRYIAQAASQVHV"
s = "AATTATATATAT"
count_repeat(n, 2, 20)