# $Id: peakfit.s 6908 2009-03-10 16:46:43Z eckel@MFAD.MFROOT.ORG $
#
# Fit the ratio data for all the spectra of a given species
#
# constraint=T: constrain the parameters to be non-negative
# common=T: use a common overall intercept per specie; otherwise, 1 per spectra
# var = variance weighting for the data
# align = number of alternate alignments, +- the preferred one, to try,
#    for assignment of the neutral mass
# iter = iterations for the final bilinear fit
# p = purity of the water
# slim = minimum allowed s value for the solution (setting the default >0
#          avoids a zerodivide when there is only 1 peak).
#
peakfit <- function(data, var=c(5000, .05), constraint=T, common=T, 
                    align=2, iter=3, p=.9, slim=0.70){
    Call <- match.call()
    if (!is.data.frame(data)) {
        indat <- data
        data <- data$data
        }
    else indat <- NULL

    if (iter <1) stop("Invalid iterations")
    if (align) iso <- peakfit.align(data)
    else iso <- data$iso
    
    clust.list <- sort(unique(data$cluster))
    cltemp <- match(data$cluster, clust.list)
    nclust <- max(cltemp)
    mass <- mean((data$m.z-1)*data$z - (iso-1))  #average monoistotopic mass
                        
    # cmat allows us to fit in terms of beta instead of delta,
    #  which gives stronger constraints
    # Note that cmat here is equivalent to D in Eckel-Passow et al.
    # dmat fits in terms of theta, but for a fixed s
    # cmat and dmat here refer to that as defined in Terrys technical report
    cmat <- matrix(c(1,0,0,  1-p, p, 0, (1-p)^2, 2*p*(1-p), p^2), ncol=3)
    pc <- slim/p       # Binding efficiency of the peptide
    dmat <- matrix(c(1,0,0,  (1-pc)^2, 2*pc*(1-pc), pc^2), ncol=2)

    # Try out the various alignments
    # Doug: the next set of code fits alignments based on species mass
    # First find the mass of the highest peak. In case of ties, use the
    # min function to pick a reasonable mass

    max.mass <- min(floor((data$m.z*data$z)[which(data$y==max(data$y))]))
     
    # How far away is the max from the monoisotopic mass?
    # Use the pdist function to calculate the isotopic distribution of a
    # species with a given mass using averagine
    
    temp.max <- pdist(max.mass)

    # Pick off the ranges of masses where the monoisotopic can be
    # and add a little wiggle room

    mono.range <- max(which(temp.max>max(temp.max)*.9))+4 #Wiggle room for an all 180 peak
    
    if (align)  states <- seq(-mono.range, mono.range)
    else states <- 0
    # Don't try a silly alignment that puts all of the data to the
    #   left of the neutral peak (it leads to singular fits of a constant)
    # In fact, force at least 1 peak to be to the right of "iso=1".
    if (max(states+2) >= max(iso)) 
        states <- states - (3 + max(states)-max(iso))

    gfit <- double(length(states))  #IInitializing the goodness of fit for each state

    # First pass: try all alignments
    # But, we need to keep the case weights fixed, so that the measure of
    #   goodness (weighted sum of errors) is comparable between alignments.
    # There are 4 cases below, so the code looks very complicated:
    #   do we constrain the coefs to be >0 (yes/no); do we have 1 intercept
    #   per species (common=T) or one per spectra.
    #   
    for (offset in states) {
        iso.temp <- iso - offset
        minstate <- min(c(1, iso.temp))  #minimum isotopic state for y
        maxstate <- max(c(iso.temp, 6))  #data extends to second 18O peak
        n.per.clust <- 1+ maxstate - minstate
        
        y <- matrix(0., nrow=n.per.clust, ncol=nclust)
        pnum <- 1 + iso.temp - minstate    #numbers might start at -1
        if (any(pnum > n.per.clust | pnum <1)) stop("numbering bug")

        y[cbind(pnum, cltemp)] <- data$y  #y matrix, with zeros for holes

        x1 <- pdist(mass+offset, isotope, .0001)  # real avergine convolution
        if (minstate <1)  x1 <- c(rep(0, 1-minstate), x1) #extra rows of zeros
                        
        if (length(x1) < n.per.clust) 
            x1 <- c(x1, rep(0., n.per.clust - length(x1)))
        else x1 <- x1[1:n.per.clust]

        # The above manipulations of x1 ensures that x and y will line up
        #  properly
        xmat <- cbind(rep(x1, nclust),
                      rep(c(0,0, x1[1:(n.per.clust-2)]), nclust),
                      rep(c(0,0,0,0, x1[1:(n.per.clust-4)]), nclust))
        xmat <- xmat %*% cmat  # change to beta parameterization (see paper)
        afac <- rep(1:nclust, each=n.per.clust)  # the x matrix for alpha
        yvar <- var[1] + var[2]* c(y)  
        alpha <- colMeans(y) # starting estimate for alphas

        # For the "find best alignment" iterations, we don't allow yvar to
        #  be reset.  Otherwise the comparative residuals would not be fair
        wt <- (1/yvar)/sum(1/yvar)
        
        for (i in 1:max(iter,1)) {
            tx <- xmat * alpha[afac]  #scale xmat by alpha
            if (common) {
                if (constraint) {
                    bfit <- nnls.fit2(cbind(1, tx), c(y), 
				 weight=sqrt(1/yvar))    #beta fits
                    # Verify that we obey the theta constraint
                    pc <- 2*bfit$coef[4]/(2*bfit$coef[4] + bfit$coef[3])
                    if (is.na(pc)) pc <- 0  # the all 16O case, beta3=beta4=0
                    if ((pc*p) < slim) {
                        # sc=pc*p is the incorporation rate
                        # Use the constraint specified in the call,
                        #   fit in terms of theta
                        # From Terrys technical report, we see that CD=S
                        tfit <- nnls.fit2(cbind(1, tx %*% dmat), c(y),
                                         weight=sqrt(1/yvar))
                        bfit$coef <- c(tfit$coef[1], dmat %*% tfit$coef[-1])
                        }
                    else {
                        theta2 <- bfit$coef[4]/pc^2
                        theta1 <- bfit$coef[2] - (1-pc)^2*theta2
                        if (theta1 <0) {
                            # use the value of pc that I found above
                            d2 <- cbind(c(1,0,0),
                                        c((1-pc)^2, 2*pc*(1-pc), pc^2))
                            tfit <- nnls.fit2(cbind(1, tx %*% d2), c(y),
                                         weight=sqrt(1/yvar))
                            bfit$coef <- c(tfit$coef[1], d2%*% tfit$coef[-1])
                            }
                        }
                    }
                # else if constraint=F then fit lm() instead of nnls.fit2
                else {
                    bfit <- lm(c(y) ~ tx, weight= 1/yvar) #fit the betas
                    }
                # predicted y, beyond intercept
                yhat <- matrix(xmat %*% bfit$coef[2:4], ncol=nclust)
                if (all(yhat<=0)) alpha <- colMeans(y) # punt - bad fit
                else alpha <- pmax(0, colSums((y-bfit$coef[1]) *yhat) / 
                                      colSums(yhat^2))
                }
            else {  #intercept per group
                if (constraint) stop("Program not finished")
                bfit <- lm(c(y) ~ tx + factor(afac)-1, weight= 1/yvar)
                yhat <- matrix(xmat %*% bfit$coef[2:4], ncol=nclust)
                temp <- (bfit$coef[-(1:3)])[afac]
                alpha <- pmax(0, colSums((y-temp) *yhat) / colSums(yhat^2))
                }
            }
        resid <- y- (yhat*alpha[afac] + bfit$coef[1])
        gfit[match(offset, states)] <- ifelse(all(yhat<=0),10^12,sqrt(mean(wt*resid^2)))
        }
    best.state <- min(states[which(gfit==min(gfit))])

    # Now fit it all again, allowing the variance to be updated
    #  but only for the best alignment
    iso <- iso - best.state
    minstate <- min(c(1, iso))  #minimum isotopic state for y
    maxstate <- max(c(iso, 6))  #data extends to second 18O peak
    n.per.clust <- 1+ maxstate - minstate

    y <- matrix(0., nrow=n.per.clust, ncol=nclust)
    pnum <- 1 + iso - minstate    #numbers might start at -1
    if (any(pnum > n.per.clust | pnum <1)) stop("numbering bug")

    y[cbind(pnum, cltemp)] <- data$y  #y matrix, with zeros for holes

    x1 <- pdist(mass+best.state, isotope, .0001)  # real avergine convolution
    if (minstate <1)  x1 <- c(rep(0, 1-minstate), x1) #extra rows of zeros
    
    if (length(x1) < n.per.clust) 
        x1 <- c(x1, rep(0., n.per.clust - length(x1)))
    else x1 <- x1[1:n.per.clust]

    # The above manipulations of x1 ensures that x and y will line up
    #  properly
    xmat <- cbind(rep(x1, nclust),
                  rep(c(0,0, x1[1:(n.per.clust-2)]), nclust),
                  rep(c(0,0,0,0, x1[1:(n.per.clust-4)]), nclust))
    xmat <- xmat %*% cmat  # change to beta parameterization (see paper)
    afac <- rep(1:nclust, each=n.per.clust)  # the x matrix for alpha

    yvar <- var[1] + var[2]* c(y)  
    alpha <- colMeans(y) # starting estimate for alphas
    for (i in 1:iter) {
        tx <- xmat * alpha[afac]  #scale xmat by alpha
        if (common) {
            if (constraint) {
                bfit <- nnls.fit2(cbind(1, tx), c(y), 
				 weight=sqrt(1/yvar))
                # Verify that we obey the theta constraint
                pc <- 2*bfit$coef[4]/(2*bfit$coef[4] + bfit$coef[3])
                if (is.na(pc)) pc <- 0   # beta3 = beta4 =0 case, all 16O
                if ((pc*p) < slim) {
                    # Use the constraint specified in the call
                    tfit <- nnls.fit2(cbind(1, tx %*% dmat), c(y),
                                     weight=sqrt(1/yvar))
                    bfit$coef <- c(tfit$coef[1], dmat %*%tfit$coef[-1])
                    }
                else {
                    theta2 <- bfit$coef[4]/pc^2
                    theta1 <- bfit$coef[2] - (1-pc)^2*theta2
                    if (theta1 <0) {
                        # use the value of pc I found
                        d2 <- cbind(c(1,0,0),
                                    c((1-pc)^2, 2*pc*(1-pc), pc^2))
                        tfit <- nnls.fit2(cbind(1, tx %*% d2), c(y),
                                         weight=sqrt(1/yvar))
                        bfit$coef <- c(tfit$coef[1], d2%*% tfit$coef[-1])
                        }
                    }
                }
            else {
                bfit <- lm(c(y) ~ tx, weight= 1/yvar) #fit the betas
                }
            # predicted y, beyond intercept
            yhat <- matrix(xmat %*% bfit$coef[2:4], ncol=nclust)
            yvar <- var[1] + (yhat+bfit$coef[1])*var[2]
            if (all(yhat<=0)) alpha <- colMeans(y) # punt - bad fit
            else alpha <- pmax(0, colSums((y-bfit$coef[1]) *yhat) / 
                               colSums(yhat^2))
            }
          # else if common=F then fit intercept per group
        else { 
            if (constraint) stop("Program not finished")
            bfit <- lm(c(y) ~ tx + factor(afac)-1, weight= 1/yvar)
            yvar <- var[1] + var[2]*predict(bfit)
            yhat <- matrix(xmat %*% bfit$coef[2:4], ncol=nclust)
            temp <- (bfit$coef[-(1:3)])[afac]
            alpha <- pmax(0, colSums((y-temp) *yhat) / colSums(yhat^2))
            }
        }

    # One last pass, with the current alpha
    #  (The above iteration gets beta, then alpha.  We want to finish with
    #   beta as the last step).
    tx <- xmat * alpha[afac]  #scale xmat, update with new alpha
    if (common) {
	if (constraint) {
	    bfit <- nnls.fit2(cbind(1, tx), c(y), 
				 weight=sqrt(1/yvar))
            # Verify that we obey the theta constraint
            pc <- 2*bfit$coef[4]/(2*bfit$coef[4] + bfit$coef[3])
            if (is.na(pc)) pc <- 0   # beta3 = beta4 =0 case, all 16O
            if ((pc*p) < slim) {
                # Use the constraint specified in the call
                tfit <- nnls.fit2(cbind(1, tx %*% dmat), c(y),
                                 weight=sqrt(1/yvar))
                bfit$coef <- c(tfit$coef[1], dmat %*%tfit$coef[-1])
                bfit$residuals <- tfit$residuals
                theta <- tfit$coef[-1]
                pc <- slim/p
                }
            else {
                theta2 <- bfit$coef[4]/pc^2
                theta1 <- bfit$coef[2] - (1-pc)^2*theta2
                if (theta1 <0) {
                    # use the value of pc I found above, fit theta
                    d2 <- cbind(c(1,0,0),
                                c((1-pc)^2, 2*pc*(1-pc), pc^2))
                    tfit <- nnls.fit2(cbind(1, tx %*% d2), c(y),
                                         weight=sqrt(1/yvar))
                    bfit$coef <- c(tfit$coef[1], d2%*% tfit$coef[-1])
                    theta <- tfit$coef[-1]
                    }
                else theta <- c(theta1, theta2)
                }

            # contrary to documentation, nnls.fit returns not the
            #  residuals, but weighted residuals. Unweight them.
            bfit$residuals <- bfit$residuals *sqrt(yvar) 
            }
	else
            bfit <- lm(c(y) ~ tx, weight= 1/yvar) #fit the betas
	}

    else
            bfit <- lm(c(y) ~ tx + factor(afac), weight= 1/yvar)
                      
    names(alpha) <- clust.list
    wt <- c(1/yvar)
    #
    # Normalize beta and alpha, so that sum(beta) =1
    #
    bscale <- sum(bfit$coef[-1])
    if (bscale >0) { #very rarely, the best fit is intercept only
        bfit$coef <- c(bfit$coef[1], bfit$coef[-1]/bscale)
        theta <- theta/bscale
        alpha <- alpha*bscale
        }

    # Return the original X matrix
    xmat <- cbind(x1,  c(0,0, x1[1:(n.per.clust-2)]),
                       c(0,0,0,0, x1[1:(n.per.clust-4)]))

    dimnames(xmat) <- list(NULL, c("delta1", "delta2", "delta3"))
    names(gfit) <- states
    out <- list(fit=bfit, alpha=alpha, clusters=clust.list, xmat=xmat,
                y=y, weight= wt/mean(wt), nmass=mass + best.state, best.state=best.state,
                gfit=gfit,
                iso = minstate + 0:(nrow(y) -1), p = p, residuals=bfit$residuals,
                pc=pc, theta=theta,
                call=Call)
    if (!is.null(indat)) out <- c(out, indat[-1])
    out$tx <- tx
    oldClass(out) <- 'peakfit'
    out
    }
        

        
    
    
    
