philippmuench
10/1/2017 - 6:25 PM

simulate CRISPR sequences

simulate CRISPR sequences

#!/usr/bin/env python

import random
import sys

# space sequence characteristics
setsize_min = 2 # min number of spacer
setsize_max = 8 # max number of spacer
minlength = 30 # min length of spacer
maxlength = 40 # max length of spacer

# palindromic repeat characteristics
repeat_length_min = 15
repeat_length_max = 35

# neightboring sequence
neightbor_min = 1000
neightbor_max = 1500

def simulate_random_sequence(length):
	dna = ['A', 'C', 'G', 'T']
	sequence = ''
	for i in range(length):
		sequence += random.choice(dna)
	return sequence

def simulate_repeat(length):
	dna = ['A', 'C', 'G', 'T']
	sequence = ''
	for i in range(int(length/2)):
		sequence += random.choice(dna)
	sequence_reverse = sequence[::-1]
	palindrom = sequence + sequence_reverse
	return palindrom

def simulate_crispr(num, file):
	file = open(file,'w')
	for i in range(1, num):
		# create repeat
		repeat = simulate_repeat(random.randint(repeat_length_min, repeat_length_max))
		# create spacer set
		sequenceset = []
		for i in range(random.randint(setsize_min, setsize_max)):
			rlength = random.randint(minlength, maxlength)
			sequenceset.append(simulate_random_sequence(rlength))
		#create neightboring sequence
		left = simulate_repeat(random.randint(neightbor_min, neightbor_max))
		right = simulate_repeat(random.randint(neightbor_min, neightbor_max))
		# join to final crispr 
		for spacer in sequenceset:
			joined = repeat + spacer
		file.write(joined+ '\n') 
	file.close() 

simulate_crispr(100000, 'crisprs.txt')