Bioops

Bioinformatics=(ACGAAG->AK)+(#!/bin/sh)+(P(A|B)=P(B|A)*P(A)/P(B))

Solving Bridge Regression Using Local Quadratic Approximation (LQA)

| Comments

Bridge regression is a broad class of penalized regression, and can be used in high-dimensional regression problems.

It includes the ridge (q=2) and lasso (q =1) as special cases.

More technical details can be found here. Below R code demonstrates:

  1. sovling bridge regression using local quadratic approximation (LQA) and Newton–Raphson algorithm.
  2. simulation of tuning parameters using 50/50/200 observations (training/validation/testing).
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# The function to solve bridge regression
bridge<-function(x, y, lambda, q=1, eta=0.001){
  library(glmnet)
  # use ridge coefficients as a starting value
  beta.start<-coef(glmnet(x, y, alpha=0, lambda=lambda, intercept=F))
  beta.mat<-matrix(NA,ncol=ncol(beta.start),nrow=nrow(beta.start)-1)

  # take each lambda value in the grid
  for(i in 1:length(grid)){
    # initial beta without intercept
    beta_prev<-as.vector(beta.start[-1,i])
    # initial converge
    converge<-10^10

    # iteration until converge or two many iterations
    iteration<-0
    while(converge>eta && iteration<=100){
      iteration<-iteration+1
      # judge whether some beta is small enough
      del<-which(abs(beta_prev)<eta)

      # if all coefficient are small enough then stop the iteration
      if(length(del)==length(beta_prev)){
        beta_prev[del]<-0
        converge<-0

        # else if we need to remove some but not all of the coefficients  
      }else if(length(del)>0){
        # set these beta to 0
        beta_prev[del]<-0

        # update design matrix x
        x.new<-x;x.new<-x.new[,-del]

        # calculate the diagonal matrix involving penalty terms
        # and the next beta
        if(length(beta_prev)-length(del)==1){
          diag<-grid[i]*q*(abs(beta_prev[-del])^(q-2))/2
        }else{
          diag<-diag(grid[i]*q*(abs(beta_prev[-del])^(q-2))/2)
        }

        # next beta
        beta_curr<-beta_prev
        beta_curr[-del]<-solve(t(x.new)%*%x.new+diag)%*%t(x.new)%*%y

        # new converge value
        converge<-sum((beta_curr-beta_prev)^2)
        # next iteration
        beta_prev<-beta_curr

        # if we don't need to remove the coefficients
      }else{
        x.new<-x
        diag<-diag(grid[i]*q*(abs(beta_prev)^(q-2))/2)
        # next beta
        beta_curr<-solve(t(x.new)%*%x.new+diag)%*%t(x.new)%*%y

        # new converge value
        converge<-sum((beta_curr-beta_prev)^2)
        # next iteration
        beta_prev<-beta_curr

      }
    }

    beta.mat[,i]<-beta_prev
  }
  colnames(beta.mat)<-grid
  return(beta.mat)
}

# The function to find the optimal coefficients set
# that minimize mse of validation set
bridge.opt<-function(coef.matrix, newx, newy){
  # calculate the mse
  mse.validate<-NULL
  for(i in 1:ncol(coef.matrix)){
    mse.validate<-c(mse.validate, mean((newx%*%coef.matrix[,i]-newy)^2))
  }
  # find the optimal coefficients set
  coef.opt<-coef.matrix[,which.min(mse.validate)]
  return(coef.opt)
}


##################
# start simulation
# For this simulation, create three data sets 
# consisting of 50/50/200 observations (training/validation/testing).
# Use validation data to select the tuning parameter (lambda).


# q settings
q_seq<-c(0.1,0.5,1,2)
# lambda grid
grid=10^seq(10,-2,length=100)
# training and testing set
ntrain<-50
ntest<-200
# validation set size
nvalidate<-50

# repeat number
repeatnum<-100
# initialize MSE
MSE<-matrix(NA, nrow=repeatnum,ncol=length(q_seq))
colnames(MSE)<-q_seq

library(MASS)
# the beta
beta<-matrix(c(3,1.5,0,0,2,0,0,0),ncol=1)
# covariance matrix of X
cov<-matrix(NA,8,8)
for (i in 1:8){
  for (j in 1:8){
    cov[i,j]<-0.5^(abs(i-j))
  }
}

for (i in 1:repeatnum){
  # generate X
  x.train<-mvrnorm(n=ntrain,rep(0,8),cov)
  x.test <-mvrnorm(n=ntest,rep(0,8),cov)
  x.validate <-mvrnorm(n=nvalidate,rep(0,8),cov)
  # generate error term
  err.train<-matrix(rnorm(n=ntrain,mean=0,sd=3),ncol=1)
  err.test<-matrix(rnorm(n=ntest,mean=0,sd=3),ncol=1)
  err.validate<-matrix(rnorm(n=nvalidate,mean=0,sd=3),ncol=1)
  # calculate Y
  y.train<-x.train%*%beta+err.train
  y.test<-x.test%*%beta+err.test
  y.validate<-x.validate%*%beta+err.validate
  # centralize Y and standardize X
  y.train<-scale(y.train,scale=F);y.test<-scale(y.test,scale=F)
  y.validate<-scale(y.validate,scale=F)
  x.train<-scale(x.train);x.test<-scale(x.test)
  x.validate<-scale(x.validate)

  # run for each q
  for(j in 1:length(q_seq)){

    # coefficients for trainning set
    bridge.train<-bridge(x.train, y.train, grid, q=q_seq[j], eta=0.001)
    # find the optimal coefficients set that minimize mse of validation set
    coef.opt<-bridge.opt(bridge.train, x.validate, y.validate)
    # MSE on the test set using the optimal model
    MSE[i,j]<-mean((x.test%*%coef.opt-y.test)^2)
  }
}

# mean of MSEs under different models
apply(MSE,2,mean)
# sd of MSEs under different models
apply(MSE,2,sd)
# boxplot of MSEs under different models
boxplot(MSE, ylab="MSE", xlab="q")

bridge MSE plots

Comments