Search and plot trendy topics in genetics example

Load packages in RStudio

library(rentrez)
library(reshape2)
library(ggplot2)

Create a function

papers_by_year <- function(years, search_term){
  return(sapply(years, function(y) entrez_search(db="pubmed", term = search_term, mindate=y, maxdate=y, retmax=0)$count))
}
  • With that we can fetch the data for each term and, by searching with no term, find the total number of papers published in each year:
years <- 1990:2015
total_papers <- papers_by_year(years, "")
omics <- c("genomic", "epigenomic", "metagenomic", "proteomic", "transcriptomic", "pharmacogenomic", "connectomic" )
trend_data <- sapply(omics, function(t) papers_by_year(years, t))
trend_props <- trend_data/total_papers
  • That’s the data, ready for plot:
trend_df <- melt(data.frame(years, trend_props), id.vars="years")
p <- ggplot(trend_df, aes(years, value, colour=variable))
p + geom_line(size=1) + scale_y_log10("number of papers")
## Warning: Transformation introduced infinite values in continuous y-axis

Full code

papers_by_year <- function(years, search_term){
            return(sapply(years, function(y) entrez_search(db="pubmed",term=search_term, mindate=y, maxdate=y, retmax=0)$count))
        }

years <- 1990:2011
total_papers <- papers_by_year(years, "")
omics <- c("genomic", "epigenomic", "metagenomic", "proteomic", "transcriptomic", "pharmacogenomic", "connectomic" )
trend_data <- sapply(omics, function(t) papers_by_year(years, t))
trend_props <- data.frame(trend_data/total_papers)
trend_props$years <- years

trend_df <- melt(as.data.frame(trend_props), id.vars="years")
p <- ggplot(trend_df, aes(years, value, colour=variable)) 
p + geom_line(size=1) + scale_y_log10("number of papers")
## Warning: Transformation introduced infinite values in continuous y-axis