## This file is part of MLPY.
## Diagonal Linear Discriminant Analysis.

## This is an implementation of Diagonal Linear Discriminant Analysis described in:
## 'Block Diagonal Linear Discriminant Analysis With Sequential Embedded Feature Selection'
## Roger Pique'-Regi'
    
## This code is written by Roberto Visintainer, <visintainer@fbk.eu>
## (C) 2008 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

__all__ = ['Dlda']

from numpy import *
from numpy.linalg import inv, LinAlgError

def wmw_auc(y, r):
    """
    Compute the AUC by using the Wilcoxon-Mann-Whitney formula.
    """
    
    if y.shape[0] != r.shape[0]:
        raise ValueError("y and r have different length")
    
    if unique(y).shape[0] > 2:
        raise ValueError("wmw_auc() works only for two-classes")
    
    
    idxp = where(y ==  1)[0]
    idxn = where(y == -1)[0]
    
    AUC = 0.0
    for p in idxp:
        for n in idxn:           
            if (r[p] - r[n]) > 0.0:
                AUC += 1.0
                
    return AUC / float(idxp.shape[0] * idxn.shape[0])
            
def mcc(y, p):
    """
    Compute the Matthews Correlation Coefficient (MCC).
    """

    if y.shape[0] != p.shape[0]:
        raise ValueError("y and p have different length")

    if unique(y).shape[0] > 2 or unique(p).shape[0] > 2:
        raise ValueError("mcc() works only for two-classes")

   
    tpdiff = (y[y == 1]  == p[y == 1])
    tndiff = (y[y == -1] == p[y == -1])
    fpdiff = (y[p == 1]  == p[p == 1])
    fndiff = (y[p == -1] == p[p == -1])

    tp = tpdiff[tpdiff == True] .shape[0]   
    tn = tndiff[tndiff == True] .shape[0]
    fp = fpdiff[fpdiff == False].shape[0]   
    fn = fndiff[fndiff == False].shape[0]

    den = sqrt((tp+fn)*(tp+fp)*(tn+fn)*(tn+fp))

    if den == 0.0:
        return 0.0

    num = ((tp*tn)-(fp*fn))
   
    return num / den



def dot3(a1, M, a2):
    """
    Compute a1 * M * a2T
    """
    
    a1M = dot(a1, M)
    res = inner(a1M, a2)
    return res
    

class Dlda:
    """
    Diagonal Linear Discriminant Analysis.

    Example:
    
    >>> from numpy import *
    >>> from mlpy import *
    >>> xtr = array([[1.1, 2.4, 3.1, 1.0],  # first sample
    ...              [1.2, 2.3, 3.0, 2.0],  # second sample
    ...              [1.3, 2.2, 3.5, 1.0],  # third sample
    ...              [1.4, 2.1, 3.2, 2.0]]) # fourth sample
    >>> ytr = array([1, -1, 1, -1])         # classes
    >>> mydlda = Dlda(nf = 2)                     # initialize dlda class
    >>> mydlda.compute(xtr, ytr)        # compute dlda
    1
    >>> mydlda.predict(xtr)             # predict dlda model on training data
    array([ 1, -1,  1, -1])
    >>> xts = array([4.0, 5.0, 6.0, 7.0])   # test point
    >>> mydlda.predict(xts)                 # predict dlda model on test point
    -1
    >>> mydlda.realpred                     # real-valued prediction
    -21.999999999999954
    >>> mydlda.weights(xtr, ytr)        # compute weights on training data
    array([  2.13162821e-14,   0.00000000e+00,   0.00000000e+00,   4.00000000e+00])
    """
   
    def __init__(self, nf = 0, tol = 10, overview = False, bal = False):
        """
        Initialize Dlda class.
        
        :Parameters:
          nf : int (1 <= nf >= #features)
            the number of the best features that you want to use in
            the model. If nf = 0 the system stops at a number of features
            corresponding to a peak of accuracy

          tol : int
            in case of nf = 0 it's the number of steps
            of classification to be calculated after the peak to avoid a
            local maximum

          overview : bool
            set True to print informations about the
            accuracy of the classifier at every step of the compute

          bal : bool
            set True if it's reasonable to consider the
            unbalancement of the test set similar to the one of the
            training set
        """

        if nf < 0:
            raise ValueError("nf value must be >= 1 or 0")
        
        self.__nf       = nf
        self.__tol      = tol       
        self.__computed = False
        self.__overview = overview
        self.__bal      = bal


    def __compute_d(self, j):
        """Compute the distance between the centroids of the
        distribution of the two classes of data.
        """
        
        a = self.__A[:]
        a.append(j)
        X = self.__x[:, a]
        medpos = mean(X[where (self.__y == 1)], axis = 0)
        medneg = mean(X[where (self.__y == -1)], axis = 0)
        d = (medpos - medneg)
        return d

        
    def __compute_sigma(self, j):
        """ Compute a metric in order to choose the 'best' features
        between the ones left from the previous passages.
        
        See Eq.7 Pg.3
        """ 
           
        Xa = self.__x[:, j]
        Xpos = Xa[where(self.__y==1), :][0]
        Xneg = Xa[where(self.__y==-1), :][0]
        sigma = sqrt(var(Xpos, axis = 0)) + sqrt(var(Xneg, axis = 0))
        return sigma


    def __compute_b(self):
        """ Compute of the parameter 'b' offset of the classification
        hyperplan.

        Adaptive offset (b) based on MCC value of the prediction is computed.
        """
        MAXMCC = -1
        BestB = 0
        RP = self.realpred = dot(self.__x[:, self.__A], self.__WA)
        
        L = zeros_like(RP)
        SRP = sort(RP)
        for i in range(len(SRP)-1):
            B = 0.5 * (SRP[i] + SRP[i+1])
            L[where(RP < B)] = -1
            L[where(RP >= B)] = 1
            MCC = mcc(self.__y,L)
            if MCC > MAXMCC:
                MAXMCC = MCC
                BestB = B
                
        self.__b = BestB
                  
    def __choose_model(self):
        """With a l.o.o. classification verify which model gives the
        best accuracy.
        """
            
        tmp = ones((self.__K.shape[0], self.__K.shape[1]), dtype = int8)
        tmp[:-1, :-1] = self.__Kmask
        tmp[-1, : - (len(self.__A) - self.__m_code)] = tmp[:-(len(self.__A) - self.__m_code), -1] = 0
        mask_sameblock = tmp.copy()
        tmp[-1, :-1] = tmp[:-1, -1] = 0
        mask_otherblock = tmp.copy()

        try:
            acc_ob, mcc_ob, auc_ob = self.__check_model(mask_otherblock)
            acc_sb, mcc_sb, auc_sb = self.__check_model(mask_sameblock)
        except:
            return 0
        
        if mcc_ob > mcc_sb:
            self.__Kmask = mask_otherblock
            self.__checkstop(mcc_ob)
            self.__m_code = len(self.__A) - 1
            if self.__overview == True:        
                print 'With', len(self.__A), 'features the accuracy on training data is:', \
                      acc_ob * 100, '%, the MCC value is', mcc_ob, "and auc =",auc_ob
                
        else:
            self.__Kmask = mask_sameblock
            self.__checkstop(mcc_sb)
            if self.__overview == True:        
                print 'With', len(self.__A), 'features the accuracy on training data is:', \
                      acc_sb * 100, '%, the MCC value is', mcc_sb, "and auc =",auc_sb

    
    def __check_model(self, mask):
        """Given the next best feature calculates which covariance
        matrix model is the best.
        
        See Table1 Pg.2
        """
        p_mcc = zeros(self.__x.shape[0])
        rp_auc = zeros(self.__x.shape[0])
        n_right = 0
        pred = 0
        xf = self.__x[:, self.__A]
        for i in range(self.__x.shape[0]):
            s = range(self.__x.shape[0])
            s.remove(i)
            xsf = xf[s,:]
            ys = self.__y[s]
            ytest = self.__y[i]
            try:
                K = cov(xsf.transpose(), bias = 1) * mask
            except:
                return 0
            
            medpos = mean(xsf[where(ys == 1), :][0], axis = 0)
            medneg = mean(xsf[where(ys == -1), :][0], axis = 0)
            d = medpos - medneg
            try: 
                w = dot(inv(K), d)
            except LinAlgError:
                w = dot(pinv(K), d)
            
            pred = dot(self.__x[i,self.__A], w) - self.__b    
            rp_auc[i] = pred
            
            if pred >= 0.0:
                p_mcc[i] = 1
            elif pred < 0.0:
                p_mcc[i] = -1
            
            if (pred >= 0 and ytest == 1) or (pred < 0 and ytest == -1):
                n_right += 1

        acc = n_right*1.0 / self.__x.shape[0]*1.0
        mcc_res = mcc(self.__y, p_mcc)
        auc_res = wmw_auc(self.__y, rp_auc)
        return acc, mcc_res, auc_res


    def __addfeat(self, BF):
        """Adds the chosen feature to the final list of features 'A'
        and deletes it from 'AC'. Update correlation matrix 'K',
        distance 'd' and weights 'WA'.
        """
        
        if self.__K == None:
            self.__K = array([[cov(self.__x[:,BF], bias = 1)]])
            self.__d = self.__compute_d(BF)
            try:
                self.__WA = dot(inv(self.__K), self.__d)
            except:
                self.__WA = dot(pinv(self.__K), self.__d)
        else:
            res = self.__compute_WA(BF)
            self.__WA = res[0]
            self.__K = res[2]
            self.__d = res[1]
            
        self.__A.append(BF)
        self.__AC.remove(BF)
        self.__compute_b()


    def __update_K(self, j):
        """Updates the correlation matrix starting from the one
        result of the previous step.
        """
        
        a = self.__A[:]
        a.append(j)
        X = self.__x[:, a]
        return cov(X.transpose(), bias = 1)


    def __compute_WA(self, j):
        """Compute the vector of weights at every step of the cycle
        (the number of weights increases with the number of features
        considered).
        
        See Eq.6 Pg.3
        """
        
        d = self.__compute_d(j)
        K = self.__update_K(j)
        ### NB: adding a new feature we don't have info about the mask so we use the whole cov matrik K
        try:
            WA = dot(inv(K),d)
        except:
            WA = dot(pinv(K),d)
        return [WA,d,K]


    def __compute_j(self, j):
        """
        Compute a metric in order to choose the 'best' features between
        the ones left from the previous passages
        See Eq.7 Pg.3
        """
        
        res_WA = self.__compute_WA(j)
        WA = res_WA[0]
        d_t = res_WA[1].transpose()
        K = res_WA[2]
        num = inner(d_t,WA)**2.0
        den = dot3(WA, K, WA)
        return (num / den)


    def __checkstop(self, M):
        """In case of 'auto stop mode' (nf = 0). Counts the number
        of steps in which the model doesn't exceeds the peak value,
        resets the peak value and count otherwise.
        """
        
        if M > (self.__peak + 1e-3): # Don't update under 1e-3 over the peak
            try:
                self.__WA_stored = dot(inv(self.__K*self.__Kmask),self.__d)
            except:
                self.__SingularMatrix = True
                return 0
            self.__b_stored = self.__b
            self.__A_stored = self.__A[:]
            self.__cont = 0
            self.__peak = M
            
        else:
            self.__cont += 1
    
        
    def __select_features(self):
        """In a cycle selects the best features and the best model to use.
        See Algorithm 1 Pg.3
        """

        if len(self.__A) == 0: # Check it's really the first step (for landscape)
            self.__b = 0
            Bestval = 0
            for j in self.__AC:
                dist = sum(abs(self.__compute_d(j))) ## Distance L2
                val = dist / self.__compute_sigma(j) * 1.0
            
                if val > Bestval:
                    Bestval = val
                    Bestfeat = j
                
            self.__addfeat(Bestfeat)
            
        # IF N OF FEATURES IS DEFINED
        if self.__nf > 0:
            while (len(self.__A) < self.__nf):

                bestval = None
                bestfeat = None
                for j in self.__AC:
                    res_j = self.__compute_j(j)
                    val_j = res_j
                    if val_j >= bestval:
                        bestval = val_j
                        bestfeat = j
                            
                if bestfeat == None:  # If all the features generate a singular matrix the compute returns 0
                    return 0
                else:
                    self.__addfeat(bestfeat)
                    self.__choose_model()
            try:
                self.__WA = dot(inv(self.__K*self.__Kmask),self.__d)
            except:
                self.__WA = dot(pinv(self.__K*self.__Kmask),self.__d)

            if self.__overview == True:
                print "Weights for ", self.__nf, "features: " ,self.__WA
                print 'This model is going to use', len(self.__A), 'features'

        # IF USE AUTOSTOP 
        if self.__nf == 0:
            while ((len(self.__AC) > 0) and (self.__cont < self.__tol)):
                bestval = None
                bestfeat = None
                for j in self.__AC:

                    res_j = self.__compute_j(j)
                    val_j = res_j
                    if val_j >= bestval:
                        bestval = val_j
                        bestfeat = j

                print bestfeat
                if bestfeat == None:  # If all the features generate a singular matrix the compute returns 0
                    return 0

                else:
                    self.__addfeat(bestfeat)
                    self.__choose_model()
            
            self.__WA = self.__WA_stored
            self.__b = self.__b_stored
            self.__A = self.__A_stored
            
            if self.__overview == True:
                print "Weights for ",len(self.__A),"features: ", self.__WA
                print 'This model is going to use', len(self.__A), 'features'

    def compute (self, x, y, mf = 0):
        """
        Compute Dlda model.

        :Parameters:  
          x : 2d ndarray float (samples x feats)
            training data
          y : 1d ndarray integer (-1 or 1)
            classes
          mf : int
            number of classification steps to be calculated
            more on a model already computed

        :Returns:
          1

        :Raises:
          LinAlgError
            if x is singular matrix
        """
        
        if (self.__nf == 0) or (self.__computed == False):
            mf = 0
        
        if mf == 0:
            self.__classes = unique(y)
            if self.__classes.shape[0] != 2:
                raise ValueError("DLDA works only for two-classes problems")

            if x.shape[1] < self.__nf:
                raise ValueError("nf value must be <= total number of features")

            cl0 = where(y   == self.__classes[0])[0]
            cl1 = where(y   == self.__classes[1])[0]             
            self.__ncl0     = cl0.shape[0]
            self.__ncl1     = cl1.shape[0]
            
            self.__piN      = self.__ncl0 * 1.0 / x.shape[0] * 1.0
            self.__piP      = self.__ncl1 * 1.0 / x.shape[0] * 1.0
            
            self.__AC       = range(x.shape[1])
            self.__x        = x
            self.__y        = y
            
            self.__b        = None
            self.__d        = None
            self.__K        = None
            self.__Kmask    = ones((1,1))
            self.__A        = []
            self.__m_code   = 0
            self.__WA       = None
            self.__peak     = 0
            self.__cont     = 0
            self.__WA_stored= None
            self.__b_stored = None
            self.__A_stored  = None
        else:
            self.__nf += mf
            
            
        self.__select_features()

        self.__computed = True
        
        return 1
        
    def predict (self, p):
        """
        Predict Dlda model on test point(s).

        :Parameters:
          p : 1d or 2d ndarray float (sample(s) x feats)
            test sample(s)

        :Returns:
          cl : integer or 1d numpy array integer
            class(es) predicted

        :Attributes:
          self.realpred : float or 1d numpy array float
            real valued prediction
        
        """
        if self.__computed == False:
            raise StandardError("Dlda model not computed yet")
        
        if p.ndim == 2:
            self.realpred = dot(p[:, self.__A], self.__WA) - self.__b
            pred = zeros(self.realpred.shape[0], dtype=int)
            pred[where(self.realpred > 0.0)] = 1
            pred[where(self.realpred < 0.0)] = -1
                
        elif p.ndim == 1:
            pred = 0.0
            self.realpred = dot(p[:, self.__A], self.__WA) - self.__b

            if self.realpred > 0.0:
                pred = 1

            elif self.realpred < 0.0:
                pred = -1

        return pred


    def weights (self, x, y):
        """
        Return feature weights.

        :Parameters:
          x : 2d ndarray float (samples x feats)
            training data
          y : 1d ndarray integer (-1 or 1)
            classes
        
        :Returns:
          fw :  1d ndarray float
            feature weights, they are going to be
            > 0 for the features chosen for the classification and = 0 for
            all the others
        """
    
        self.compute(x, y, 0)

        weights = zeros(x.shape[1])
        for i in range(len(self.__A)):
            weights[self.__A[i]] = self.__WA[i]

        if self.__overview:
            print "The positions of the best features are:", self.__A
        return abs(weights)
    
