felipealbrecht
10/1/2015 - 12:41 PM

DNA motif matching operations - Finding peaks that overlaps with the TATAA sequence

DNA motif matching operations - Finding peaks that overlaps with the TATAA sequence

import xmlrpclib
import time

url = "http://deepblue.mpi-inf.mpg.de/xmlrpc"
user_key = "anonymous_key"

server = xmlrpclib.Server(url, allow_none=True)

# Find all locations where the motif TATAA appears in the genome
(status, tataa_regions) = server.find_motif("TATAAA", "GRCh38", "chr1", None, None, False, user_key)

# Selecting the data from 2 experiments: BL-2_c01.ERX297416.H3K27ac.bwa.GRCh38.20150527.bed and S008SGH1.ERX406923.H3K27ac.bwa.GRCh38.20150728.bed
# As we already know the experiments names, we keep all others fields empty.
# We are selecting the are in the chromosome 1, position 0 to 50.000.000.
(status, query_id) = server.select_experiments (["BL-2_c01.ERX297416.H3K27ac.bwa.GRCh38.20150527.bed", "S008SGH1.ERX406923.H3K27ac.bwa.GRCh38.20150728.bed"], "chr1", 0, 50000000, user_key )

# Overlap regions with pattern
(status, overlapped) = server.intersection(query_id, tataa_regions, user_key)

# Retrieve the experiments data (The @NAME meta-column is used to include the experiment name and @BIOSOURCE for experiment's biosource
(status, request_id) = server.get_regions(overlapped, "CHROMOSOME,START,END,SIGNAL_VALUE,PEAK,@NAME,@BIOSOURCE,@LENGTH,@SEQUENCE", user_key)

# Wait for the server processing
(status, info) = server.info(request_id, user_key)
request_status = info[0]["state"]
while request_status != "done" and request_status != "failed":
  time.sleep(1)
  (status, info) = server.info(request_id, user_key)
  request_status = info[0]["state"]

(status, regions) = server.get_request_data(request_id, user_key)

print regions