felipealbrecht
9/24/2015 - 11:58 AM

Use case 2 from the DEEPBLUE Poster for the BLUEPRINT annual meeting. (09.2015). Find H3k27ac peaks that overlap promoters in all BLUEPRINT

Use case 2 from the DEEPBLUE Poster for the BLUEPRINT annual meeting. (09.2015). Find H3k27ac peaks that overlap promoters in all BLUEPRINT. Then, find the transcription factors peaks that overlap with these enhancers, using ENCODE datasets.

# Aggregate DNA methylation signal in your own datasets by all available H3K4me3 peaks in hESCs.

import xmlrpclib
import time

url = "http://deepblue.mpi-inf.mpg.de/xmlrpc"
user_key = "anonymous_key"

server = xmlrpclib.Server(url, allow_none=True)

(s, monocytes_biosources) = server.get_biosource_children("monocyte", user_key)
monocytes_biosources_names = [biosource[1] for biosource in monocytes_biosources]
print monocytes_biosources_names

(status, samples) = server.list_samples(monocytes_biosources_names, {"source": "BLUEPRINT Epigenome"}, user_key)
samples
samples_ids = [sample[0] for sample in samples]
(status, experiments) = server.list_experiments("", "DNA Methylation", samples_ids, "Bisulfite-Seq", "BLUEPRINT Epigenome", user_key)
print experiments


selected_experiments = []
peak_format = "CHROMOSOME,START,END,NAME,SCORE,STRAND,SIGNAL_VALUE,P_VALUE,Q_VALUE"
for experiment in experiments:
    (status, info) = server.info(experiment[0], user_key) # experiment[0] is the ID of the experiment.
    if info[0]["data_type"] == "signal" and info[0]["extra_metadata"]["FILE_TYPE"] == "BS_METH_CALL_CNAG":
        selected_experiments.append(experiment)
experiment_names = [experiment[1] for experiment in selected_experiments]
print experiment_names

(status, query_id) = server.select_regions(experiment_names, None, None, None, None, None, "chr1", None, None, user_key )

server.list_requests("", user_key)
server.get_request_data(request_id, user_key)
(status, annotation_key) = server.select_annotations("Cpg Islands", "hg19", "chr1", None, None, user_key)

experiments_columns = {}
for experiment_name in experiment_names:
    experiments_columns[experiment_name] = "VALUE"
print experiments_columns

(status, request) = server.score_matrix(experiments_columns, "mean", annotation_key , user_key )

(status, info) = server.info(request, user_key)
while info[0]["state"] != "done" and info[0]["state"] != "error":
	time.sleep(5)
	print info
	(status, info) = server.info(request, user_key)


(status, data) = server.get_request_data(request_id, user_key)