aboutsummaryrefslogtreecommitdiff
path: root/ncbi/dbsnp/dbsnp.py
blob: 3861d965ea23d8228db575541c462e937b3fa42c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python3

import ncbiutils
from lxml import etree

"""
Finds all rsids that are explicitly cited in pubmed
and returns a list
"""
def get_complete_rsids():
    results = ncbiutils.esearch(db="snp", retmode="json", retmax=200000, retstart=0, term='snp_pubmed_cited[sb]')
    rsidlist = results["esearchresult"]["idlist"]
    for x in range(0, len(rsidlist)):
        rsidlist[x] = "rs" + rsidlist[x]
    return(rsidlist)

"""
Generates a list of PMIDs that are explicitly cite a given rsid
"""
def get_pmids(rsid):
    searchterm = rsid + "+AND+pubmed_snp_cited[sb]"
    print(searchterm)
    results = ncbiutils.esearch(db="pubmed", retmode="json", retmax=200000, restart=0, term=searchterm, api_key="7c0213f7c513fa71fe2cb65b4dfefa76fb09")
    pmidlist = results["esearchresult"]["idlist"]
    print(pmidlist)
    return(pmidlist)


"""
Takes a pmid and returns the abstract text
"""
def get_abstract(pmid):
    raw = ncbiutils.efetch(db="pubmed", id=pmid, rettype="abstract")
    xml = etree.fromstring(raw)
    abstracts = []
    for a in xml.xpath('//AbstractText'):
        abstracts.append(a.text)
    print(abstracts)
    return()

def main():
    rsids = get_complete_rsids()
    for rsid in rsids:
        pmids = get_pmids(rsid)
        for pmid in pmids:
            get_abstract(pmid)
    return()

if __name__ == '__main__':
    main()