stevenbeales
12/28/2018 - 2:27 AM

Adverse Events

# exec(open('GetAdverseEventsForOTdrugs.py').read())

import requests
import json

# Drug names are keys in all three dictionaries
Drugs = {} # Count of the different adverse events per drug
DrugsAE = {} # The adverse events for each drug
Drugs_ChEMBLID = {} # Drug name to ChEMBL ID mapping

# Read in the ChEMBL tsv file to pull out the list of unique drugs
filename = "Hackathon_July2018/evs_chembl.tsv"

with open(filename, 'r') as input:
    n = 0
    for row in input:
        n += 1
        (DiseaseId, Disease, DrugName, DrugType, ChEMBL_ID, maxPhaseForDisease, DrugAction, DrugDescription, GeneID, GeneSymbol, EvidenceID) = tuple(row.rstrip().split('\t'))
        if DrugName not in Drugs_ChEMBLID:
            #Drugs[DrugName] = ""
            #DrugsAE[DrugName] = ""
            Drugs_ChEMBLID[DrugName] = ChEMBL_ID[39:]


# Go through unique drug names and check for which API call to openFDA returns data
for currDrug in Drugs:
    print(currDrug)
    #print("patient.drug.medicinalproduct:"+currDrug)
    response = requests.get("https://api.fda.gov/drug/event.json",
                            params={"search": "patient.drug.medicinalproduct:"+currDrug,
                                    "count": "patient.reaction.reactionmeddrapt.exact"})
    if response.status_code == 200:
        Drugs[currDrug] = len(response.json()['results'])
        print(len(response.json()['results']))
        DrugsAE[currDrug] = response.json()['results']

import collections

c=collections.Counter(Targets)
print(c)

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

fig, ax = plt.subplots()
ax.scatter(c.keys(), c.values())
#plt.gca().set_ylim([-5, 50])
ax.set(xlabel='Number of different adverse events', ylabel='Number of drugs',
       title='Distribution of number of adverse events')
ax.grid()
fig.savefig("AdverseEventCounts_all.png")
plt.show()

#  *** Save the data in TSV format: DrugName, ChEMBL_ID, AdverseEvent, Count ***
with open('drug_adverse_events.tsv', 'w') as f:
    #f.write(l[1] + "\t" + l[2] + "\t" + l[3] + "\t" + l[4] + "\t" + l[5] + "\n")
    f.write('drug_name' + '\t' + 'drug_ChEMBL_ID' + '\t' + 'adverse_event' + '\t' + 'adverse_event_count' + '\n')
    # Go through all drugs in the Drug dictionary
    for currDrug in Drugs:
        # For each adverse event for currDrug in DrugsAE
        for advEvent in DrugsAE[currDrug]:
            f.write(currDrug + '\t' + Drugs_ChEMBLID[currDrug] + '\t' + advEvent['term'] + '\t' + str(advEvent['count']) + '\n')


# *** Write JSON lines into a file ***
with open('drug_adverse_events.json', 'w') as f:
    for currDrug in Drugs:
        # For each adverse event for currDrug in DrugsAE
        d = {'drug_name': currDrug, 'drug_id': Drugs_ChEMBLID[currDrug], 'adverse_events': DrugsAE[currDrug]}
        if Drugs[currDrug] != '':
            f.write(json.dumps(d))
            f.write('\n')