#Loops and functions
===============
#1)
for (i in 1:1000) print (i)
total=0

#2)
for (i in 1:1000) {
  total=total+i
	}
total

#3) 
divideByTwo <- function(x) return(x/2)
divideByTwo(10)

#4) 
ninetynine <- function(x) return (99)

#5)
addThem <- function(x,y) return(x+y)


#One take on how to scrape Wikipedia pageviews
  
getData <- function(url){
  #function to download data in json format
  require(rjson)
	raw.data <- readLines(url, warn="F") 
	rd  <- fromJSON(raw.data)
	rd.views <- rd$daily_views 
	rd.views <- unlist(rd.views)
	rd <- as.data.frame(rd.views)
  rd$date <- rownames(rd)
  rownames(rd) <- NULL
	return(rd)
}

  
getUrls <- function(y1,y2,term){
  #function to create a list of urls given a term and a start and endpoint
    urls <- NULL
    for (year in y1:y2){
      for (month in 1:9){
      	urls <- c(urls,(paste("http://stats.grok.se/json/en/",year,0,month,"/",term,sep="")))
    	}
    
    	for (month in 10:12){
      	urls <- c(urls,(paste("http://stats.grok.se/json/en/",year,month,"/",term,sep="")))
    	}
    }
    return(urls)
}
  
getStats <- function(y1,y2,terms){
  #function to download data for each term
  #returns a dataframe
  output <- NULL
  for (term in terms){
    urls <- getUrls(y1,y2,term)
    
    results <- NULL
    for (url in urls){
      print(url)
      results <- rbind(results,getData(url))
    }
    results$term <- term
    
    output <- rbind(output,results)
  }
  return(output)
}
  
visualiseStats <- function(input){
  #function to visualise data from the getStats function
  require(lubridate)
  require(ggplot2)
  input$date <- as.Date(input$date)
  ggplot(input,aes(date,rd.views,colour=term))+geom_line()
}

input <- getStats(2011,2012,c("Data_mining","Web_scraping"))
visualiseStats(input)