From e951e4c41c23e0927ff53ddc263ca054691502d4 Mon Sep 17 00:00:00 2001 From: Jonathan DeMasi Date: Mon, 17 Jun 2019 19:56:34 -0600 Subject: fetching pmids --- ncbi/dbsnp/dbsnp.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'ncbi') diff --git a/ncbi/dbsnp/dbsnp.py b/ncbi/dbsnp/dbsnp.py index fbad492..00ccb07 100755 --- a/ncbi/dbsnp/dbsnp.py +++ b/ncbi/dbsnp/dbsnp.py @@ -5,6 +5,7 @@ import time from Bio import Entrez +DEBUG = True Entrez.email = "jonathan.demasi@colorado.edu" # We should apply for an API key so we get more queries/sec Entrez.api_key = None @@ -29,8 +30,38 @@ def get_complete_rsids(): additional_queries = additional_queries - 1 return(rsidlist) +def get_pmids(interm): + # This is obsolete now, essentially, but + # allows a user to pass a single string + # which can be nice. + if isinstance(interm, str): + interm = "rs" + interm + " AND pubmed_snp_cited[sb]" + search_results = Entrez.read(Entrez.esearch(db="pubmed", term=interm, + retmax=100000, + usehistory="y")) + print("Found a total of " + + search_results["Count"] + " results using search string '" + interm + "'") + return search_results + + elif isinstance(interm, list): + searchstring = " OR ".join(interm) + searchstring = "(" + searchstring + ") AND pubmed_snp_cited[sb]" + search_results = Entrez.read(Entrez.esearch(db="pubmed", + term=searchstring, + retmax=100000, + usehistory="y")) + print("Found a total of " + + search_results["Count"] + " results using search string'" + searchstring + "'") + return(search_results) + def main(): - listy = get_complete_rsids() + rsids = get_complete_rsids() + if DEBUG: + for x in rsids: + print(x) + for x in rsids: + get_pmids(x) + time.sleep(1) return() if __name__ == '__main__': -- cgit v1.2.3