aboutsummaryrefslogblamecommitdiff
path: root/ncbi/dbsnp/dbsnp.py
blob: 3861d965ea23d8228db575541c462e937b3fa42c (plain) (tree)
1
2
3
4
5
6
7
8
9
                      
 
                
                      
 



                                                   
                         
                                                                                                                 


                                                 

                    





                                                               
                                                                                                                                                       


                                                 
 

   
                                          
   







                                                                    
 
           
                                
                      


                               


                          
         
#!/usr/bin/env python3

import ncbiutils
from lxml import etree

"""
Finds all rsids that are explicitly cited in pubmed
and returns a list
"""
def get_complete_rsids():
    results = ncbiutils.esearch(db="snp", retmode="json", retmax=200000, retstart=0, term='snp_pubmed_cited[sb]')
    rsidlist = results["esearchresult"]["idlist"]
    for x in range(0, len(rsidlist)):
        rsidlist[x] = "rs" + rsidlist[x]
    return(rsidlist)

"""
Generates a list of PMIDs that are explicitly cite a given rsid
"""
def get_pmids(rsid):
    searchterm = rsid + "+AND+pubmed_snp_cited[sb]"
    print(searchterm)
    results = ncbiutils.esearch(db="pubmed", retmode="json", retmax=200000, restart=0, term=searchterm, api_key="7c0213f7c513fa71fe2cb65b4dfefa76fb09")
    pmidlist = results["esearchresult"]["idlist"]
    print(pmidlist)
    return(pmidlist)


"""
Takes a pmid and returns the abstract text
"""
def get_abstract(pmid):
    raw = ncbiutils.efetch(db="pubmed", id=pmid, rettype="abstract")
    xml = etree.fromstring(raw)
    abstracts = []
    for a in xml.xpath('//AbstractText'):
        abstracts.append(a.text)
    print(abstracts)
    return()

def main():
    rsids = get_complete_rsids()
    for rsid in rsids:
        pmids = get_pmids(rsid)
        for pmid in pmids:
            get_abstract(pmid)
    return()

if __name__ == '__main__':
    main()