# exec(open('GetAdverseEventsForOTdrugs.py').read())
import requests
import json
# Drug names are keys in all three dictionaries
Drugs = {} # Count of the different adverse events per drug
DrugsAE = {} # The adverse events for each drug
Drugs_ChEMBLID = {} # Drug name to ChEMBL ID mapping
# Read in the ChEMBL tsv file to pull out the list of unique drugs
filename = "Hackathon_July2018/evs_chembl.tsv"
with open(filename, 'r') as input:
n = 0
for row in input:
n += 1
(DiseaseId, Disease, DrugName, DrugType, ChEMBL_ID, maxPhaseForDisease, DrugAction, DrugDescription, GeneID, GeneSymbol, EvidenceID) = tuple(row.rstrip().split('\t'))
if DrugName not in Drugs_ChEMBLID:
#Drugs[DrugName] = ""
#DrugsAE[DrugName] = ""
Drugs_ChEMBLID[DrugName] = ChEMBL_ID[39:]
# Go through unique drug names and check for which API call to openFDA returns data
for currDrug in Drugs:
print(currDrug)
#print("patient.drug.medicinalproduct:"+currDrug)
response = requests.get("https://api.fda.gov/drug/event.json",
params={"search": "patient.drug.medicinalproduct:"+currDrug,
"count": "patient.reaction.reactionmeddrapt.exact"})
if response.status_code == 200:
Drugs[currDrug] = len(response.json()['results'])
print(len(response.json()['results']))
DrugsAE[currDrug] = response.json()['results']
import collections
c=collections.Counter(Targets)
print(c)
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
ax.scatter(c.keys(), c.values())
#plt.gca().set_ylim([-5, 50])
ax.set(xlabel='Number of different adverse events', ylabel='Number of drugs',
title='Distribution of number of adverse events')
ax.grid()
fig.savefig("AdverseEventCounts_all.png")
plt.show()
# *** Save the data in TSV format: DrugName, ChEMBL_ID, AdverseEvent, Count ***
with open('drug_adverse_events.tsv', 'w') as f:
#f.write(l[1] + "\t" + l[2] + "\t" + l[3] + "\t" + l[4] + "\t" + l[5] + "\n")
f.write('drug_name' + '\t' + 'drug_ChEMBL_ID' + '\t' + 'adverse_event' + '\t' + 'adverse_event_count' + '\n')
# Go through all drugs in the Drug dictionary
for currDrug in Drugs:
# For each adverse event for currDrug in DrugsAE
for advEvent in DrugsAE[currDrug]:
f.write(currDrug + '\t' + Drugs_ChEMBLID[currDrug] + '\t' + advEvent['term'] + '\t' + str(advEvent['count']) + '\n')
# *** Write JSON lines into a file ***
with open('drug_adverse_events.json', 'w') as f:
for currDrug in Drugs:
# For each adverse event for currDrug in DrugsAE
d = {'drug_name': currDrug, 'drug_id': Drugs_ChEMBLID[currDrug], 'adverse_events': DrugsAE[currDrug]}
if Drugs[currDrug] != '':
f.write(json.dumps(d))
f.write('\n')