aboutsummaryrefslogtreecommitdiff
path: root/ncbi
diff options
context:
space:
mode:
authorJonathan DeMasi <jonathan.demasi@colorado.edu>2019-06-18 13:48:30 -0600
committerJonathan DeMasi <jonathan.demasi@colorado.edu>2019-06-18 13:48:30 -0600
commit5f34a35553a2c58d21096822ecb4e79c669f4521 (patch)
tree83205ffc5816b78269a8a5426b44db7939801e21 /ncbi
parent6f59cd431b1fba5fe06a6687c78f991878562d61 (diff)
downloadsnippets-5f34a35553a2c58d21096822ecb4e79c669f4521.tar
snippets-5f34a35553a2c58d21096822ecb4e79c669f4521.tar.gz
snippets-5f34a35553a2c58d21096822ecb4e79c669f4521.tar.bz2
snippets-5f34a35553a2c58d21096822ecb4e79c669f4521.tar.lz
snippets-5f34a35553a2c58d21096822ecb4e79c669f4521.tar.xz
snippets-5f34a35553a2c58d21096822ecb4e79c669f4521.tar.zst
snippets-5f34a35553a2c58d21096822ecb4e79c669f4521.zip
pushing all changes with working ncbiutils
Diffstat (limited to 'ncbi')
-rwxr-xr-xncbi/dbsnp/dbsnp.py75
-rw-r--r--[-rwxr-xr-x]ncbi/dbsnp/ncbiutils.py11
2 files changed, 22 insertions, 64 deletions
diff --git a/ncbi/dbsnp/dbsnp.py b/ncbi/dbsnp/dbsnp.py
index 9878df7..abbe326 100755
--- a/ncbi/dbsnp/dbsnp.py
+++ b/ncbi/dbsnp/dbsnp.py
@@ -1,58 +1,31 @@
#!/usr/bin/env python3
import time
+import ncbiutils
from Bio import Entrez
-import xml.etree.ElementTree as ET
-DEBUG = True
-Entrez.email = "jonathan.demasi@colorado.edu"
-# We should apply for an API key so we get more queries/sec
-Entrez.api_key = None
-
+"""
+Finds all rsids that are explicitly cited in pubmed
+and returns a list
+"""
def get_complete_rsids():
- rsidlist = []
- numresults = 0
- retstart = 0
- search_string = "snp_pubmed_cited[sb]"
- search_results = Entrez.read(Entrez.esearch(db="snp", term=search_string,
- retmax=100000, retstart=retstart, usehistory="y"))
- print("Found a total of " +
- search_results["Count"] + " results using search string '" + search_string + "'")
- numresults = search_results["Count"]
- rsidlist = rsidlist + search_results["IdList"]
- additional_queries = int(int(numresults) / 100000)
- while additional_queries != 0:
- retstart = retstart + 100000
- search_results = Entrez.read(Entrez.esearch(db="snp", term=search_string,
- retmax=100000, retstart=retstart, usehistory="y"))
- rsidlist = rsidlist + search_results["IdList"]
- additional_queries = additional_queries - 1
+ results = ncbiutils.db_query(db="snp",retmode="json",retmax=200000,retstart=0,term='snp_pubmed_cited[sb]')
+ rsidlist = results["esearchresult"]["idlist"]
+ for x in range(0, len(rsidlist)):
+ rsidlist[x] = "rs" + rsidlist[x]
return(rsidlist)
-def get_pmids(interm):
- # This is obsolete now, essentially, but
- # allows a user to pass a single string
- # which can be nice.
- if isinstance(interm, str):
- interm = "rs" + interm + " AND pubmed_snp_cited[sb]"
- search_results = Entrez.read(Entrez.esearch(db="pubmed", term=interm,
- retmax=100000,
- usehistory="y"))
- print("Found a total of " +
- search_results["Count"] + " results using search string '" + interm + "'")
- return(search_results)
-
- elif isinstance(interm, list):
- searchstring = " OR ".join(interm)
- searchstring = "(" + searchstring + ") AND pubmed_snp_cited[sb]"
- search_results = Entrez.read(Entrez.esearch(db="pubmed",
- term=searchstring,
- retmax=100000,
- usehistory="y"))
- print("Found a total of " +
- search_results["Count"] + " results using search string'" + searchstring + "'")
- return(search_results)
+"""
+Generates a list of PMIDs that are explicitly cite a given rsid
+"""
+def get_pmids(rsid):
+ searchterm = rsid + "+AND+pubmed_snp_cited[sb]"
+ print(searchterm)
+ results = ncbiutils.db_query(db="pubmed",retmode="json",retmax=200000,restart=0,term=searchterm, api_key="7c0213f7c513fa71fe2cb65b4dfefa76fb09")
+ pmidlist = results["esearchresult"]["idlist"]
+ print(pmidlist)
+ return(pmidlist)
"""
@@ -95,14 +68,8 @@ def get_abstracts_from_list(pmids_list):
def main():
rsids = get_complete_rsids()
- if DEBUG:
- for x in rsids:
- print(x)
- for x in rsids:
- pmids = get_pmids(x)
- abstracts = get_abstracts_from_list(pmids)
- for thing in abstracts:
- print(thing)
+ for rsid in rsids:
+ get_pmids(rsid)
return()
if __name__ == '__main__':
diff --git a/ncbi/dbsnp/ncbiutils.py b/ncbi/dbsnp/ncbiutils.py
index 8fb998b..b3c199a 100755..100644
--- a/ncbi/dbsnp/ncbiutils.py
+++ b/ncbi/dbsnp/ncbiutils.py
@@ -10,18 +10,9 @@ def db_query(**kwargs):
for key, value in kwargs.items():
args.append(key+"="+str(value))
qstring = "&".join(args)
- print(qstring)
resp = requests.get(BASE_URL + qstring)
if resp.status_code == 200:
results = resp.json()
return(results)
else:
- print("You've encountered an error and we can't return your results")
-
-def main():
- results = db_query(db="snp", term="snp_pubmed_cited[sb]", retmax=200000, retstart=1000, retmode="json")
- print(results)
- return()
-
-if __name__ == '__main__':
- main() \ No newline at end of file
+ print("You've encountered an error and we can't return your results") \ No newline at end of file