From 0cae42c6a6fccfdb591f2a1855c82776a669d4a8 Mon Sep 17 00:00:00 2001 From: Jonathan DeMasi Date: Mon, 17 Jun 2019 19:09:18 -0600 Subject: fetching results --- ncbi/dbsnp/dbsnp.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) mode change 100644 => 100755 ncbi/dbsnp/dbsnp.py (limited to 'ncbi') diff --git a/ncbi/dbsnp/dbsnp.py b/ncbi/dbsnp/dbsnp.py old mode 100644 new mode 100755 index e5a0d9b..fbad492 --- a/ncbi/dbsnp/dbsnp.py +++ b/ncbi/dbsnp/dbsnp.py @@ -1 +1,38 @@ #!/usr/bin/env python3 + +import sys +import time +from Bio import Entrez + + +Entrez.email = "jonathan.demasi@colorado.edu" +# We should apply for an API key so we get more queries/sec +Entrez.api_key = None + +def get_complete_rsids(): + rsidlist = [] + numresults = 0 + retstart = 0 + search_string = "snp_pubmed_cited[sb]" + search_results = Entrez.read(Entrez.esearch(db="snp", term=search_string, + retmax=100000, retstart=retstart, usehistory="y")) + print("Found a total of " + + search_results["Count"] + " results using search string '" + search_string + "'") + numresults = search_results["Count"] + rsidlist = rsidlist + search_results["IdList"] + additional_queries = int(int(numresults) / 100000) + while additional_queries != 0: + retstart = retstart + 100000 + search_results = Entrez.read(Entrez.esearch(db="snp", term=search_string, + retmax=100000, retstart=retstart, usehistory="y")) + rsidlist = rsidlist + search_results["IdList"] + additional_queries = additional_queries - 1 + return(rsidlist) + +def main(): + listy = get_complete_rsids() + return() + +if __name__ == '__main__': + main() + -- cgit v1.2.3