# Get percentiles and mean for each row
get.stats <- function( x )
{
  result <- matrix( nrow=nrow(x), ncol=6, 
                    dimnames=list(NULL, c("Mean", "2.5%", "97.5%", "50%","90%","99%") )  )
  
  for( i in 1:nrow(x))
  {
    result[i,2:6] <- quantile( x[i,], probs=c( 0.025, 0.975,0.5,0.9, 0.99 ))
  }
  result[,1] <- rowMeans( x )
  return( result )
}

get.stats.2d <- function( x )
{
  # Each row is a bootstrap sample.
  # Thus, the stats calculated here, are variability
  x <- get.stats( x )
  
  result <- matrix(get.stats( t( x ) ), 
                   nrow=6, ncol=6, 
                   dimnames=list(variability=c("Mean", "2.5%", "97.5%", "50%","90%","99%"),
                                 uncertainty=c("Mean", "2.5%", "97.5%", "50%","90%","99%")
                                 )  
                   )
  return( result)
}  
  
read.codebook <- function( dont.do=FALSE ){
  codebook <- list(
    questions = read.xlsx( "./Data/codebook VCP-qmra.xlsx", sheet=1 ),
    answers = read.xlsx( "./Data/codebook VCP-qmra.xlsx", sheet=2 ),
    rawlabels = read.xlsx( "./Data/codebook VCP-qmra.xlsx", sheet=4 )
  )
  
  codes = read.xlsx( "./Data/codebook VCP-qmra.xlsx", sheet=3 )
  
  codebook.new <- merge( codebook$answers, codes, by="CODE" )
  codebook.new$CODE <- codebook.new$CODE.SUPP
  codebook.new$CODE.SUPP <- NULL
  codebook.new <- codebook.new[ codebook.new$DESC!="don't know", ]
  if( !dont.do ) codebook.new <- codebook.new[ codebook.new$DESC!="don't do", ]
  codebook.new <- codebook.new[ !is.na(codebook.new$DESC), ]
  codebook$answers <- codebook.new
  
  return( codebook )
}

# Every line of the test.data is a respondent
# Each column is a question and has a name
# The codebook$CODE[i] gives our question number for column name codebook$SPSS.CODE[i]
read.process.data <- function( codebook ){
  test.data <- read.spss("./Data/RIVM Databestand Microbiologische Voedselveiligheid.sav", 
                         use.value.labels=FALSE, to.data.frame=TRUE)

  n <- nrow( codebook$questions )
  newdata <- data.frame( id = 1:nrow(test.data) )
  # Loop over all SPSS codes in the codebook
  for( i in 1:n ){
    # Get the corresponding internal code
    newcol <- codebook$questions$CODE[i]
    if( !is.na( newcol) ){
      if( substr( newcol, 1, 1 )!="H"){
        col <- test.data[, as.character( codebook$questions$SPSS.CODE[i] ) ]
        df <- data.frame( newcol=col)
        colnames( df ) <- newcol
      }else{
         df <- data.frame( newcol=rep(NA, nrow(test.data)))
         df$newcol[1] <- as.character( codebook$questions$dutch.label[i] )
         colnames(df) <- newcol       
      }
      newdata<-cbind( newdata, df )
    }
  }
  newdata$id <- NULL
  newdata$scaling <- test.data[,3]
  return( newdata )
}

new.table.row <- function( result, label, N, dist ){    
  result$stats <- t(result$stats)
  mean   <- with( result, sprintf( "%0.2f (%0.2f,%0.2f)", stats[ "Mean", "Mean"],
                                   stats[ "2.5%", "Mean"], 
                                   stats[ "97.5%", "Mean"] ))
  
  median <- with( result, sprintf( "%0.2f (%0.2f,%0.2f)", stats[ "Mean", "50%"],
                                   stats[ "2.5%", "50%"], 
                                   stats[ "97.5%", "50%"] ))
  
  p9     <- with( result, sprintf( "%0.2f (%0.2f,%0.2f)", stats[ "Mean", "90%"],
                                   stats[ "2.5%", "90%"], 
                                   stats[ "97.5%", "90%"] ))
  
  p99    <- with( result, sprintf( "%0.2f (%0.2f,%0.2f)", stats[ "Mean", "99%"],
                                   stats[ "2.5%", "99%"], 
                                   stats[ "97.5%", "99%"] ))
  
  return( data.frame( code=codes[j],
                      type=as.character( type ),
                      question=label,
                      dist=dist,
                      mean=mean,
                      median=median,
                      p9 = p9,
                      p99 = p99
                    )
          )
}

get.data.counts <- function( to.combine, codes, codebook, data)
{
  j       <- which( codes== to.combine[1] )
  index   <- which( codebook$questions$CODE==codes[j] ) # Index into codebook
  type    <- codebook$questions$answertype[ index ]       # Type 1,2,3 or 4
  label1   <- as.character( codebook$questions$dutch.label[ index ] )
  # Get the meaning and labels of the answer options
  answers.value1 <- with( codebook$answers,
                          codebook$answers[CODE==codes[j], c("ANSWER", "LOW","HIGH", "VALUE")] )
  answers.label1 <- droplevels( na.omit( with( codebook$answers,
                                               codebook$answers[CODE==codes[j], c("ANSWER","DESC")] ) ) )
  answers1   <- as.numeric(data[,j] )
  
  j       <- which( codes== to.combine[2] )
  index   <- which( codebook$questions$CODE==codes[j] ) # Index into codebook
  type    <- codebook$questions$answertype[ index ]       # Type 1,2,3 or 4
  label2   <- as.character( codebook$questions$dutch.label[ index ] )
  
  # Get the meaning and labels of the answer options
  answers.value2 <- with( codebook$answers,
                          codebook$answers[CODE==codes[j], c("ANSWER", "LOW","HIGH", "VALUE")] )
  answers.label2 <- droplevels( na.omit( with( codebook$answers,
                                               codebook$answers[CODE==codes[j], c("ANSWER","DESC")] ) ) )
  answers2   <- as.numeric(data[,j])  # The answers given, one line per respondent
  
  ind.na <- is.na( answers1 ) | is.na( answers2 )
  
  answers <- data.frame( answers1 = answers1[ !ind.na ],
                         answers2 = answers2[ !ind.na ]
  )
  
  ind.exist <- with(answers, (answers1 %in% answers.value1$ANSWER) & (answers2 %in% answers.value2$ANSWER))
  
  answers <- answers[ind.exist,]
  
  data.counts <- with( answers, table( answers1, answers2, dnn=c("q1", "q2")))
  data.counts <- as.data.frame( data.counts )
  
  # Replace in the data, the answer strings with the numerical values
  data.counts <- merge( data.counts, answers.value1, by.x="q1", by.y="ANSWER")
  data.counts <- merge( data.counts, answers.value2, by.x="q2", by.y="ANSWER")
  return( data.counts )
}

optimgamma <- function( data.counts ){
  param <- c( log(10), log(10) )
  sol<-optim( par=param, fn=loglikgamma, f=data.counts )
  ss.hat <- exp( sol$par )
  s <- ss.hat[1]
  r <- ss.hat[2]
  return( list( r=r,s=s ) )
}

gammaplot <- function( data.counts, rs, preference ){
  p <- ggplot( data.counts, x = c(0, 150))
  p <- p + stat_function(fun = pgamma, geom = "line",
                         args=list(shape=rs$s, scale=rs$r, lower.tail=FALSE ))
  p <- p + scale_y_continuous(labels = percent, breaks=seq(0,1,0.1), limits=c(0,1))
  p <- p + scale_x_continuous( limits=c(0,150) )
  p <- p + labs( title = paste( to.combine[1], preference), x="More than X times", y="Population fraction" )
  p + theme_bw()
  ggsave( paste( paste( to.combine,preference,collapse="-"),  "_raw_stats.eps", sep="" ), path="Figuren",
          width=10, height=10, units="cm" )
}

add.to.master.table.gamma <- function( master.table, product, pref, to.combine, perc, rs )
{
  master.table <- rbind( master.table, data.frame(
    product=product,
    code1 = to.combine[1],
    code2 = to.combine[2],
    pref = pref,
    perc.category = perc,
    mean = rs$s * rs$r,
    median = qgamma( 0.5, shape=rs$s, scale=rs$r ),
    p9 = qgamma( 0.9, shape=rs$s, scale=rs$r ),
    p99 = qgamma( 0.99, shape=rs$s, scale=rs$r ))
  )
}

get.props.counts.labels <- function( to.combine, codebook, data ){
  
  codes <- colnames( data ) # Identifiers of question
  
  rawlabels <- filter( codebook$rawlabels, c1==to.combine[1] & c2==to.combine[2]) %>% mutate( preference=as.factor(text) ) 
  product   <- rawlabels$product[1]
  rawlabels <- rawlabels %>% select( c1, c2, preference ) %>% mutate( q2 = as.factor(1:nrow(rawlabels)) )
  
 
  data.counts <- get.data.counts( to.combine, codes, codebook, data)
 
  proportions.aggregated <- data.counts %>% select( q1, q2, Freq ) %>%
    left_join( rawlabels, by="q2" ) %>%
    filter( Freq!=0 ) %>% group_by( preference ) %>% summarise( prop=sum(Freq)) %>% 
    mutate( prop=prop/sum(prop))
  
  data.counts <- data.counts %>% left_join( rawlabels, by="q2" ) %>% left_join( rawlabels, by="q2" ) %>%
    select( Freq, LOW=LOW.x, HIGH=HIGH.x, VALUE=VALUE.x, preference=preference.x )
 
  return( list( data.counts=data.counts, proportions.aggregated=proportions.aggregated, product=product ))
}

optim.plot.add.gamma <- function( y, to.combine, master.table ){
  for( pref in levels(y$data.counts$preference ) ){
    rs <- optimgamma( filter( y$data.counts, preference==pref ) %>% select(-preference) )
    gammaplot( y$data.counts, rs, pref )
    master.table <- add.to.master.table.gamma( master.table, y$product, pref, to.combine,
                                               filter( y$proportions.aggregated, preference==pref) %>% select( prop ) %>% as.numeric(),
                                               rs )
  }
  return( master.table )
}
