This post is inspired by The Art of Pi by Nadieh Bremer. The code is almost unabashedly wholesale taken from her github repo for the project.

##Get the data:

First read the happiness rankings of words in.

library(jsonlite)
fileLoc = "/Users/Nick/Dropbox/vandy/musicCityStats/nashvilleBioStats/assets/happinessRanks.json"

#read in happiness rankings keeping only the word and its happiness ranking. 
happsRanks <- fromJSON(fileLoc, flatten=TRUE)$objects[,c("word", "happs")] 

Convert A story into a vector of its individual words.

This story is the New York Times article: Sued Over Old Debt, and Blocked From Suing Back

The stories text is loaded in a hidden block as the variable story for space issues.

storyToWords = function(story){
  story = tolower(story) #make sure everything is lowercase to avoid hassles. 
  strsplit(story, " ")[[1]]
}

sampleWords = storyToWords(story)

Convert the vector of the words into their respective happiness rankings.

wordsToHappiness = function(words){
  #going through all the words...
  res = as.vector(sapply(words,  function(word){
    #if we have a ranking for the word...
    if(word %in% happsRanks$word){
      #return the happiness rank associated with it
      return(happsRanks$happs[which(happsRanks$word == word)] ) 
      #otherwise return 0. 
    } else return(0)
    }))
  return(res[ res != 0.00 ]) #get rid of unknown words. 
}

sampleHappiness = wordsToHappiness(sampleWords)

Line Chart Style:

Combine these values into a dataframe and plot a simple line chart of the happiness trends.

values = data.frame("val" = sampleHappiness, "order" = seq(1, length(sampleHappiness)),stringsAsFactors=F)
ggplot(values, aes(x = order, y = val)) + 
  geom_line( color = "steelblue") + theme_bw() + 
  geom_smooth( color = "white", width = 2 ) + 
  labs("y" = "Happiness", "word position")

plot of chunk unnamed-chunk-6

Now convert the values into a random walk style coordinates.

N       <- length(sampleHappiness)
largest <- max(sampleHappiness)
#Starting at number 3 at position 0,0
x <- y <- rep(NULL, length(sampleHappiness))
x[1] <- 0
y[1] <- 0

#Calculate new position for each digit, based on the position of the old digit and the 
#angle is determined by the digit itself
for (i in 2:length(sampleHappiness)){
    x[i] <- x[(i-1)] + sin((pi*2)*(sampleHappiness[i])) #scale by largest observed val
    y[i] <- y[(i-1)] + cos((pi*2)*(sampleHappiness[i])) 
}#for i

Add these new values to the dataframe made earlier.

values$x = x
values$y = y
values$ID = 1:(length(x))
values$ColID = 1:(length(x))%%Ncols

#Random Walk Style:

Plot the random walk style chart.

#Color the line according to the number in pi, starting with 1 (3 = (0,0))
valPlot <- ggplot(values[1:N,], aes(x=x, y=y, group="1")) +
  geom_path(aes(color = factor(values$val[1:N])), size=0.5) + 
  coord_fixed(ratio = 1) + 
  theme_bw() +
  theme(line = element_blank(),
        text = element_blank(),
        line = element_blank(),
        title = element_blank(),
        legend.position="none",
        panel.border = element_blank(),
        panel.background = element_blank())
plot(valPlot)

plot of chunk unnamed-chunk-9