diff options
-rwxr-xr-x | ncbi/dbsnp/dbsnp.py | 33 |
1 files changed, 32 insertions, 1 deletions
diff --git a/ncbi/dbsnp/dbsnp.py b/ncbi/dbsnp/dbsnp.py index fbad492..00ccb07 100755 --- a/ncbi/dbsnp/dbsnp.py +++ b/ncbi/dbsnp/dbsnp.py @@ -5,6 +5,7 @@ import time from Bio import Entrez +DEBUG = True Entrez.email = "jonathan.demasi@colorado.edu" # We should apply for an API key so we get more queries/sec Entrez.api_key = None @@ -29,8 +30,38 @@ def get_complete_rsids(): additional_queries = additional_queries - 1 return(rsidlist) +def get_pmids(interm): + # This is obsolete now, essentially, but + # allows a user to pass a single string + # which can be nice. + if isinstance(interm, str): + interm = "rs" + interm + " AND pubmed_snp_cited[sb]" + search_results = Entrez.read(Entrez.esearch(db="pubmed", term=interm, + retmax=100000, + usehistory="y")) + print("Found a total of " + + search_results["Count"] + " results using search string '" + interm + "'") + return search_results + + elif isinstance(interm, list): + searchstring = " OR ".join(interm) + searchstring = "(" + searchstring + ") AND pubmed_snp_cited[sb]" + search_results = Entrez.read(Entrez.esearch(db="pubmed", + term=searchstring, + retmax=100000, + usehistory="y")) + print("Found a total of " + + search_results["Count"] + " results using search string'" + searchstring + "'") + return(search_results) + def main(): - listy = get_complete_rsids() + rsids = get_complete_rsids() + if DEBUG: + for x in rsids: + print(x) + for x in rsids: + get_pmids(x) + time.sleep(1) return() if __name__ == '__main__': |