##!
# Functions to carry out analysis of variance based on a data file in the format for ANOVA4 (ANV data)
# ANOVA4形式のデータファイルをもとに分散分析を行うための関数群
#
#  read.anv
#  write.anv
#  as.anv4
#  anv4.fit
#  print.anv4.fit
#  anova.anv4.fit
#
#
# @varsion 0.3
# @date 2011-02-06
# @author Masashi Nakanishi.
# @licence GPL2
#
# @history
#  0.3    2011-02-06
#     changed to use readLines() and read.csv(), instead of s_c_a_n_().
#     replaced e_v_a_l_() with alternatives.
#     fixed bugs of write.anv().
#  0.2    2010-08-05
#     rewrote overall.
#  0.1-5  2006-11-24
#     changed the encoding of this script file to UTF-8.
#  0.1-4  2006-08-26
#     fixed a bug of error check in read.anv()
#  0.1-3  2006-05-23
#     modified 'read.anv()' to make it efficient
#     changed the error details of 'as.anv4()'
#  0.1-2  2006-05-22
#     modified 'read.anv()' slightly
#  0.1-1  2006-05-06
#     the original


# Reads a file in ANV format and creates a data frame from it.
read.anv <- function(anvfile, print.input=FALSE, outcome.label="Outcome", block.label="Subject")
{
    print.input <- if (is.null(print.input) || is.na(print.input)) FALSE else as.logical(print.input)
    outcome.label <- make.names(as.character(outcome.label))
    block.label <- make.names(as.character(block.label))

    if (anvfile == "")
        anvfile <- stdin()
    else if (is.character(anvfile)) {
        anvfile <- file(anvfile, "r")
        on.exit(close(anvfile))
    }
    if (!inherits(anvfile, "connection"))
        stop("'anvfile' must be a character string or connection")

    descript <- readLines(con=anvfile, n=1L, ok=FALSE, warn=FALSE)
    var_names <- readLines(con=anvfile, n=4L, ok=FALSE, warn=FALSE)
    n.variable <- as.vector(as.matrix(read.csv(file=anvfile, header=FALSE, nrows=1L, strip.white=TRUE, blank.lines.skip=FALSE)))
    n.level <- as.vector(as.matrix(read.csv(file=anvfile, header=FALSE, nrows=1L, sep=",", strip.white=TRUE, blank.lines.skip=FALSE)))
    balanced <- as.logical(as.integer(readLines(con=anvfile, n=1L, ok=FALSE, warn=FALSE)))
    sizes <- as.vector(as.matrix(read.csv(file=anvfile, header=FALSE, nrows=1L, strip.white=TRUE, blank.lines.skip=FALSE)))
    data <- read.csv(file=anvfile, header=FALSE) # looser than the original format specification (?)

    if (length(n.variable) != 3 ||
        any(n.variable < 0) ||
        n.variable[1] != (n.variable[2] + n.variable[3]) ||
        length(n.level) != n.variable[1] ||
        any(n.level < 1) )
       stop("invalid input headers")
    if (balanced != all(sizes==sizes[1])) {
        if (balanced) stop("balanced but inconsistent cell sizes")
        else stop("unbalanced but consistent cell sizes")
    }

    # not check length(var_names) == n.variable[1] but trim var_names
    var_names <- var_names[1:n.variable[1]]

    names(n.variable) <- c("Total", "Between", "Within")
    names(n.level) <- var_names

    if (print.input) {
        print( list(description = descript, n.variable = n.variable, n.level = n.level, balanced = balanced,
            cell.size = sizes, data = data) )
    }

    # make a data frame
    data_anv4 <- data.frame(as.vector(t(as.matrix(data))))
    colnames(data_anv4)[1] <- outcome.label

    n.subj <- sum(sizes)
    is.between <- c(rep(TRUE, n.variable["Between"]), rep(FALSE, n.variable["Within"]))
    within_lv_prod <- prod(n.level[!is.between])

    if (prod(n.level[is.between]) != length(sizes)) stop("number of cells != number of between-subject conditions")
    if (within_lv_prod * n.subj != nrow(data_anv4)) stop("expected data size != number of observations")

    cross <- function(levs, r=1)
        sapply(1:length(levs), function(i)rep(rep(1:levs[i], each=prod(levs[-(1:i)]) ), prod(levs[0:(i-1)])*r))

    # between-factors design
    if (n.variable["Between"]) {
        des_mat <- matrix(unlist(mapply(rep, as.data.frame(t(cross(n.level[is.between]))), sizes*within_lv_prod)),
            ncol=n.variable["Between"], byrow=TRUE)
        colnames(des_mat) <- var_names[is.between]
        data_anv4 <- cbind(data_anv4, lapply(as.data.frame(des_mat), as.factor))
    }

    # within-factors design
    if (n.variable["Within"]) {
        des_mat <- cross(n.level[!is.between], n.subj)
        colnames(des_mat) <- var_names[!is.between]
        data_anv4 <- cbind(data_anv4, lapply(as.data.frame(des_mat), as.factor))
        data_anv4[block.label] <- gl(n.subj, within_lv_prod)
    }

    # check the number of variables
    if (ncol(data_anv4) < 2 || ncol(data_anv4) != (n.variable["Total"] + ifelse(n.variable["Within"], 2, 1)))
        stop("unexpected number of variables")

    # set class and attributes
    class(data_anv4) <- c("anv4", class(data_anv4))
    attr(data_anv4, "description") <- descript
    attr(data_anv4[[1]], "variable") <- "outcome"
    if (n.variable["Within"])
        attr(data_anv4[[ncol(data_anv4)]], "variable") <- "block"
    for (i in 1:n.variable["Total"])
        attr(data_anv4[[i+1]], "variable") <- if (i <= n.variable["Between"]) "between" else "within"

    if (print.input) return(invisible(data_anv4))
    return(data_anv4)
}

# Creates an 'anv4' data frame.
as.anv4 <- function(x, outcome=NULL, between.factors=NULL, block.factor=NULL, within.factors=NULL, description=NULL)
{
    match.variable <- function(datf, attrib.value) {
        (1:ncol(datf))[sapply(datf, function(i)!is.null(attr(i, "variable"))&&attr(i, "variable")==attrib.value)]
    }

    sub.1n <- function(x, f, vtype)
    {
        i <- match.variable(x, vtype)
        if (!is.null(f) || length(i) == 0) {
            v <- NULL
            if (is.list(f)) {
                if (length(f) == 1 && is.numeric(f[[1]])) {
                    v <- f
                }
            }
            else if (is.vector(f)) {
                if (is.character(f)) {
                    if (length(f) == 1)
                        v <- x[f]
                }
                else if (is.numeric(f)) {
                    if (length(f) == 1) {
                        if (f[1] > 0)
                            v <- x[f]
                    } else {
                        v <- list(f)
                    }
                }
                else if (is.logical(f)) {
                    f <- (1:ncol(x))[rep(f, ncol(x))]
                    if (length(f) == 1)
                        v <- x[f]
                }
            }

            if (!is.null(v)) {
                attr(v[[1]], "variable") <- vtype
                return(v)
            }
        }
        else if (length(i) == 1) {
            return(x[i])
        }
        return(NULL)
    }
    
    sub.1f <- function(x, f, vtype)
    {
        i <- match.variable(x, vtype)
        if (!is.null(f) || length(i) == 0) {
            v <- NULL
            if (is.list(f)) {
                if (length(f) == 1 && is.factor(f[[1]]))
                    v <- f
            }
            else if (is.factor(f)) {
                v <- list(f)
            }
            else if (is.vector(f)) {
                if (is.character(f)) {
                    if (length(f) == 1)
                        v <- x[f]
                }
                else if (is.numeric(f)) {
                    if (length(f) == 1)
                        if (f[1] > 0)
                            v <- x[f]
                }
                else if (is.logical(f)) {
                    f <- (1:ncol(x))[rep(f, ncol(x))]
                    if (length(f) == 1)
                        v <- x[f]
                }
            }
            if (!is.null(v)) {
                attr(v[[1]], "variable") <- vtype
                return(v)
            }
        }
        else if (length(i) == 1) {
            return(x[i])
        }
        return(NULL)
    }

    sub.m <- function(x, f, vtype)
    {
        i <- match.variable(x, vtype)
        if (!is.null(f) || length(i) == 0) {
            v <- NULL
            if (is.list(f)) {
                if (all(lapply(f, is.factor)))
                    v <- f
            }
            else if (is.factor(f)) {
                v <- list(f)
            }
            else if (is.vector(f)) {
                if (ncol(x[f]) >= length(f))
                    v <- lapply(x[f], as.factor)
            }
            if (!is.null(v)) {
                v <- lapply(v, function(i){attr(i, "variable") <- vtype;i})
                return(v)
            }
        }
        else {
            return(x[i])
        }
        return(NULL)
    }
    
    if (!is.data.frame(x)) {
        x <- as.data.frame(x)
    }
    
    outcome <- sub.1n(x, outcome, "outcome")
    if (is.null(outcome)) stop("no outcome variable is specified.")
    if (is.null(names(outcome))) names(outcome) <- "Outcome"

    between.factors <- sub.m(x, between.factors, "between")

    within.factors <- sub.m(x, within.factors, "within")

    block.factor <- sub.1f(x, block.factor, "block")
    if (!is.null(within.factors) && is.null(block.factor))
        stop("within-block variables require a block variable.")
    if (!is.null(block.factor) && is.null(names(block.factor))) names(block.factor) <- "Block"

    anv <- data.frame(outcome)
    if (!is.null(between.factors)) anv <- cbind(anv, between.factors)
    if (!is.null(within.factors)) anv <- cbind(anv, within.factors)
    if (!is.null(block.factor)) anv <- cbind(anv, block.factor)

    # set class and attributes
    class(anv) <- c("anv4", class(anv))
    attr(anv, "description") <- ifelse(!is.null(description), as.character(description), "")

    return (anv)
}

# Prints its required argument x (an 'anv4' object or a data frame) to a file or connection in ANV format.
write.anv <- function(x, file, description=NULL)
{
    if (!inherits(x, "anv4")) {
        stop("invalid class of argument 'x'")
    }

    match.variable <- function(datf, attrib.value) {
        (1:ncol(datf))[sapply(datf, function(i)!is.null(attr(i, "variable"))&&attr(i, "variable")==attrib.value)]
    }

    outcome.index <- match.variable(x, "outcome")
    block.index   <- match.variable(x, "block")
    between.index <- match.variable(x, "between")
    within.index  <- match.variable(x, "within")
    if (length(outcome.index) != 1) stop("not found just 1 outcome variable")
    if (length(within.index) && length(block.index) != 1) stop("not found just 1 block variable")
    if (length(block.index) > 1) stop("found needless block variables")
    if (length(c(between.index, within.index)) == 0) stop("no substantive factor variable")
    if (length(c(between.index, within.index)) > 4) stop("limited to 4 explanatory variables or less")

    if (is.null(description))
        description <- attr(x, "description")
    description <- as.character(description)
    varnames <- variable.names(x)[c(between.index, within.index)]
    varnames <- c(varnames, rep("", 4-length(varnames)))
    n.variable <- c(length(c(between.index, within.index)), length(between.index), length(within.index))
    n.level <- sapply(x[c(between.index, within.index)], nlevels)
    if (length(within.index)) {
        # check that any block is assigned to an unique combination of levels of between-block factors.
        if (!all(tapply(rep(1, nrow(x)), x[c(within.index, block.index)], length) == 1))
            stop("invalid between-block design")
    }
    cell.sizes <- as.vector(tapply(rep(1, nrow(x)), x[rev(between.index)], length))
    repeated <- prod(n.level[c(rep(FALSE, length(between.index)), rep(TRUE, length(within.index)))])
    cell.sizes <- cell.sizes / repeated
    if (repeated > 1) {
        # check that all of the combinations of within-block levels has the same times of between-block levels.
        if (!all( rep(cell.sizes, each=repeated) == as.vector(tapply(rep(1, nrow(x)), x[rev(c(between.index, within.index))], length)) ))
            stop("invalid within-block design")
    }
    balanced <- ifelse(all(cell.sizes == cell.sizes[1]), "1", "0")

    if (file == "")
        file <- stdout()
    else if (is.character(file)) {
        file <- file(file, "w")
        on.exit(close(file))
    }
    if (!inherits(file, "connection"))
        stop("'file' must be a character string or connection")

    writeLines(strtrim(description, 126), file) # 126 chars ANV format limitation
    for (v in varnames) writeLines(strtrim(v, 20), file) # 20 chars ANV format limitation
    writeLines(paste(as.character(n.variable), collapse=","), file)
    writeLines(paste(as.character(n.level), collapse=","), file)
    writeLines(balanced, file)
    writeLines(paste(as.character(cell.sizes), collapse=","), file)

    # to avoid using e_v_a_l_()
    idx <- c(between.index, block.index, within.index)
    ord <- switch(length(idx),
        order(x[[idx[1]]]),
        order(x[[idx[1]]], x[[idx[2]]]),
        order(x[[idx[1]]], x[[idx[2]]], x[[idx[3]]]),
        order(x[[idx[1]]], x[[idx[2]]], x[[idx[3]]], x[[idx[4]]]),
        order(x[[idx[1]]], x[[idx[2]]], x[[idx[3]]], x[[idx[4]]], x[[idx[5]]]) )
    x <- x[ord,]
    y <- matrix(x[[outcome.index]], ncol=repeated, byrow=TRUE)
    mode(y) <- "character"
    write.table(y, file=file, append=FALSE, quote=FALSE, sep=",", na="", row.names=FALSE, col.names=FALSE)
}

# Fits an ANOVA model by some ways to the data included in the 'anv4' object required.
anv4.fit <- function(data)
{
    if (!inherits(data, "anv4"))
        stop("invalid class of argument 'data'")

    match.variable <- function(datf, attrib.value) {
        (1:ncol(datf))[sapply(datf, function(i){ !is.null(attr(i, "variable")) && attr(i, "variable")==attrib.value })]
    }

    outcome.index <- match.variable(data, "outcome")
    block.index   <- match.variable(data, "block")
    between.index <- match.variable(data, "between")
    within.index  <- match.variable(data, "within")
    if (length(outcome.index) != 1) stop("not found just 1 outcome variable")
    if (length(within.index) && length(block.index) != 1) stop("not found just 1 block variable")
    if (length(c(between.index, within.index)) == 0) stop("no substantive factor variable")
    varnames <- variable.names(data)

    # make a formula
    model <- paste(varnames[outcome.index], "~", paste(varnames[c(between.index, within.index)], collapse="*"))
    if (length(within.index)) {
        model.within.block <- paste( paste(c(varnames[block.index], varnames[between.index]), collapse=":"),
            "/(", paste(varnames[within.index], collapse="*"), ")")
        model.error <- paste("+ Error(", model.within.block, ")")
        model.fixed.block <- paste( "+", model.within.block, "-",
            paste(varnames[c(block.index, between.index, within.index)], collapse=":"))
    } else {
        model.error <- ""
        model.fixed.block <- ""
    }

    # execute
    old.opt <- options(contrasts=c("contr.helmert", "contr.poly"))
    on.exit(options(old.opt))
    ret_list <- list()
    suppressWarnings( ret_list$aov <- aov(formula=as.formula(paste(model, model.error)), data=data) ) # singular due to no replication
    ret_list$lm <- lm(formula=as.formula(paste(model, model.fixed.block)), data=data)
    if (length(within.index)) {
        if (library("nlme", logical.return=TRUE)) {
            ret_list$lme <- lme(fixed=as.formula(model), random=as.formula(paste("~ 1 |", varnames[block.index])), data=data)
        }
    }

    class(ret_list) <- c("anv4.fit", class(ret_list))
    return(ret_list)

# Then
#   summary(aov.result) # Type I SS
#   model.tables(aov.result, "means")
#   TukeyHSD(aov.result, ...)
#   library(car)
#   Anova(lm.result, type="II")
#   Anova(lm.result, type="III")
#   anova(lme.result)
# etc.
}

print.anv4.fit <- function(x)
{
    print(x$aov)
}

anova.anv4.fit <- function(x)
{
    s <- list(aov = summary(x$aov))
    if (!is.null(x$lm)) s <- c(s, list(lm = anova(x$lm)))
    if (!is.null(x$lme)) s <- c(s, list(lme = anova(x$lme)))
    s
}