aboutsummaryrefslogtreecommitdiff
path: root/ncbi
diff options
context:
space:
mode:
authorJonathan DeMasi <jonathan.demasi@colorado.edu>2019-06-17 22:44:00 -0600
committerJonathan DeMasi <jonathan.demasi@colorado.edu>2019-06-17 22:44:00 -0600
commit1abbe71dfaa852e92248f0d97b6178d67df2071f (patch)
tree3817ccd4ad5b0ab89735682fe6d882b5b22b68e4 /ncbi
parente951e4c41c23e0927ff53ddc263ca054691502d4 (diff)
downloadsnippets-1abbe71dfaa852e92248f0d97b6178d67df2071f.tar
snippets-1abbe71dfaa852e92248f0d97b6178d67df2071f.tar.gz
snippets-1abbe71dfaa852e92248f0d97b6178d67df2071f.tar.bz2
snippets-1abbe71dfaa852e92248f0d97b6178d67df2071f.tar.lz
snippets-1abbe71dfaa852e92248f0d97b6178d67df2071f.tar.xz
snippets-1abbe71dfaa852e92248f0d97b6178d67df2071f.tar.zst
snippets-1abbe71dfaa852e92248f0d97b6178d67df2071f.zip
Added other helper functions to get abstracts and such
Diffstat (limited to 'ncbi')
-rwxr-xr-xncbi/dbsnp/dbsnp.py42
1 files changed, 41 insertions, 1 deletions
diff --git a/ncbi/dbsnp/dbsnp.py b/ncbi/dbsnp/dbsnp.py
index 00ccb07..07d51fa 100755
--- a/ncbi/dbsnp/dbsnp.py
+++ b/ncbi/dbsnp/dbsnp.py
@@ -54,6 +54,46 @@ def get_pmids(interm):
search_results["Count"] + " results using search string'" + searchstring + "'")
return(search_results)
+
+"""
+Takes the saved list of results from get_pmids and retrieves
+the raw XML to parse. Currently goes article by article instead of
+doing them in bulk. Could go either way. Unsure which way
+is better / more efficient.
+"""
+
+def get_abstracts(results):
+ abstracts_list = []
+ for start in range(0, int(results["Count"]), 1):
+ end = min(int(results["Count"]), start + 1)
+ # print("Going to download record %i to %i" % (start+1, end))
+ fetch_handle = Entrez.efetch(db="pubmed", rettype="abstract",
+ retmode="xml", retstart=start,
+ retmax=1,
+ webenv=results["WebEnv"],
+ query_key=results["QueryKey"])
+ data = fetch_handle.read()
+ fetch_handle.close()
+ root = ET.fromstring(data)
+ for abst in root.iter('Abstract'):
+ for sec in abst.iter('AbstractText'):
+ abstracts_list.append(sec.text)
+ return(abstracts_list)
+
+
+def get_abstracts_from_list(pmids_list):
+ abstracts_list = []
+ pmids_abstracts_dict = {}
+ for each_pmid in pmids_list:
+ fetch_handle = Entrez.efetch(db="pubmed", id=each_pmid, retmode='xml')
+ data = fetch_handle.read()
+ fetch_handle.close()
+ root = ET.fromstring(data)
+ for abst in root.iter('Abstract'):
+ for sec in abst.iter('AbstractText'):
+ abstracts_list.append(sec.text)
+ return(abstracts_list)
+
def main():
rsids = get_complete_rsids()
if DEBUG:
@@ -61,7 +101,7 @@ def main():
print(x)
for x in rsids:
get_pmids(x)
- time.sleep(1)
+ time.sleep(0.5)
return()
if __name__ == '__main__':