ESTRELA連載 第83回

注)コードは現在作成中です

今回使うデータ

最尤法:パッケージmlogitを使う(選択肢集合を考慮せず)

  • パッケージとデータの読み込み
    library(mlogit)
    data <- read.table("modechoice.csv", sep=",", header=T)
    TM.data <- mlogit.data(data, varying=c(9:16), shape="wide", 
    choice="choice", alt.levels=c("car", "carpool", "rail", "bus"), alt.var="mode")
  • 多項ロジットモデル
    nml.out <- mlogit(choice~cost+time,data=TM.data)
    summary(nml.out)
  • ネスティッドロジットモデル
    nl.out <- mlogit(choice~cost+time, TM.data, shape="long", method="bfgs",
    nests=list(public=c("rail","bus"),private=c("car","carpool")), alt.levels=c("car","carpool","rail","bus"))
    summary(nl.out)
  • mixed logit model
    mixed.out <- mlogit(choice~cost+time|gend,data=TM.data, rpar=c(cost='n',time='n'), correlation=TRUE, R=10, tol=10)
    summary(mixed.out)

最尤法:対数尤度関数を直接記述

  • 多項選択モデル
    #### モデル構造 ####
    # 選択肢:車・運転(car)、車・相乗り(carpool)、バス(bus)、鉄道(rail)
    # 説明変数
    #  選択肢固有変数:
    #  選択肢共通変数:所要時間、費用
    # *選択肢利用可能性を考慮
    # 効用関数
    # U_car=b1*time+b2*cost
    # U_carpool =b1*time+b2*cost+b3    
    # U_bus=b1*time+b2*cost+b4
    # U_rail=b1*time+b2*cost+b5
    ####
    # データ読み込み
    data <- read.table("modechoice.csv", sep=",", header=T)
    summary(data)
    # サンプル数
    hh <- nrow(data)
    # 選択肢数
    nc <- 4
    # パラメータ数
    np <- 5
    # ロジットモデルの推定
    # 初期値
    b0<-c(rep(0,np))
    # 尤度関数
    fr <- function(x) {   
    b <- b0
    for(i in 1:np){
    b[i] <- x[i]}
    LL=0
    # 効用関数
    # U_car=b1*time+b2*cost
    U1 <- b[1]*data$time.car+b[2]*data$cost.car
    E1 <- exp(U1)*data$avail.car
    # U_carpool =b1*time+b2*cost+b3
    U2 <- b[1]*data$time.carpool+b[2]*data$cost.carpool+b[3]*matrix(1,nrow=hh,ncol=1)
    E2 <- exp(U2)*data$avail.carpool
    # U_bus	=b1*time+b2*cost+b4
    U3 <- b[1]*data$time.bus+b[2]*data$cost.bus+b[4]*matrix(1,nrow=hh,ncol=1)
    E3 <- exp(U3)*data$avail.bus
    # U_rail =b1*time+b2*cost+b5
    U4 <- b[1]*data$time.rail+b[2]*data$cost.rail+b[5]*matrix(1,nrow=hh,ncol=1)
    E4 <- exp(U4)*data$avail.rail
    # 選択確率
    PP1<-E1/(E1+E2+E3+E4)
    PP2<-E2/(E1+E2+E3+E4)
    PP3<-E3/(E1+E2+E3+E4)
    PP4<-E4/(E1+E2+E3+E4)
    # 選択結果のみを尤度関数に反映
    P1 <- (PP1!=0)*PP1+(PP1==0)
    P2 <- (PP2!=0)*PP2+(PP2==0)
    P3 <- (PP3!=0)*PP3+(PP3==0)
    P4 <- (PP4!=0)*PP4+(PP4==0)
    # 実際の選択結果
    C1 <- data$choice == "car"
    C2 <- data$choice == "carpool"
    C3 <- data$choice == "bus"
    C4 <- data$choice == "rail"
    # 対数尤度関数
    LLL<-colSums(C1*log(P1)+C2*log(P2)+C3*log(P3)+C4*log(P4))
    LL <- LL + LLL
    return(LL)
    }
    # 最尤解の計算
    res<-optim(b0,fr, method = "BFGS", hessian = TRUE, control=list(fnscale=-1))
    # 計算結果の表示
    # パラメータ
    b<-res$par 
    print(b)
    # t値
    tval<-b/sqrt(-diag(solve(res$hessian))) 
    print(tval)
    # rho^2
    N1 <- sum(data$choice == "car")
    N2 <- sum(data$choice == "carpool")
    N3 <- sum(data$choice == "bus")
    N4 <- sum(data$choice == "rail")
    L0 <- N1*log(N1/hh)+N2*log(N2/hh)+N3*log(N3/hh)+N4*log(N4/hh)
    LL <- res$value
    rho2 <- (L0-LL)/L0
    rho2bar <- (L0-(LL-length(b)))/L0
    print(rho2)
    print(rho2bar)
  • NLモデル
    # データ読み込み
    data <- read.table("modechoice.csv", sep=",", header=T)
    summary(data)
    # サンプル数
    hh <- nrow(data)
    # 選択肢数
    nc <- 4
    # パラメータ数
    np <- 5
    # ロジットモデルの推定
    # 初期値
    b0<-c(rep(0,np))
    # 尤度関数
    fr <- function(x) {   
    b <- b0
    for(i in 1:np){
    b[i] <- x[i]}
    LL=0
    # 効用関数
    # U_car=b1*time+b2*cost
    U1 <- b[1]*data$time.car+b[2]*data$cost.car
    E1 <- exp(U1)*data$avail.car
    # U_carpool =b1*time+b2*cost                         
    U2 <- b[1]*data$time.carpool+b[2]*data$cost.carpool+b[3]*matrix(1,nrow=hh,ncol=1)
    E2 <- exp(U2)*data$avail.carpool
    # U_bus	=b1*time+b2*cost+b4
    U3 <- b[1]*data$time.bus+b[2]*data$cost.bus
    E3 <- exp(U3)*data$avail.bus
    # U_rail =b1*time+b2*cost+b5
    U4 <- b[1]*data$time.rail+b[2]*data$cost.rail+b[4]*matrix(1,nrow=hh,ncol=1)
    E4 <- exp(U4)*data$avail.rail
    # ログサム変数
    LS1 <- exp(b[5]*log(E1+E2))
    LS2 <- exp(b[5]*log(E3+E4))
    # 選択確率
    PP1<-(E1/(E1+E2))*(LS1/(LS1+LS2))
    PP2<-(E2/(E1+E2))*(LS1/(LS1+LS2))
    PP3<-(E3/(E3+E4))*(LS2/(LS1+LS2))
    PP4<-(E4/(E3+E4))*(LS2/(LS1+LS2))
    # 選択結果のみを尤度関数に反映
    P1 <- (PP1!=0)*PP1+(PP1==0)
    P2 <- (PP2!=0)*PP2+(PP2==0)
    P3 <- (PP3!=0)*PP3+(PP3==0)
    P4 <- (PP4!=0)*PP4+(PP4==0)
    # 実際の選択結果
    C1 <- data$choice == "car"
    C2 <- data$choice == "carpool"
    C3 <- data$choice == "bus"
    C4 <- data$choice == "rail"
    # 対数尤度関数
    LLL<-colSums(C1*log(P1)+C2*log(P2)+C3*log(P3)+C4*log(P4))
    LL <- LL + LLL
    return(LL)
    }
    # 最尤解の計算
    res<-optim(b0,fr, method = "BFGS", hessian = TRUE, control=list(fnscale=-1))
    # 計算結果の表示
    # パラメータ
    b<-res$par 
    print(b)
    # t値
    tval<-b/sqrt(-diag(solve(res$hessian))) 
    print(tval)
    # rho^2
    N1 <- sum(data$choice == "car")
    N2 <- sum(data$choice == "carpool")
    N3 <- sum(data$choice == "bus")
    N4 <- sum(data$choice == "rail")
    L0 <- N1*log(N1/hh)+N2*log(N2/hh)+N3*log(N3/hh)+N4*log(N4/hh)
    LL <- res$value
    rho2 <- (L0-LL)/L0
    rho2bar <- (L0-(LL-length(b)))/L0
    print(rho2)
    print(rho2bar) 

ベイズ法(R&JAGS)

  • 多項選択モデル
  • JAGSファイル(mnl.txt)
    model {
    L <- sum(LL[,])
    for (j in 1:np) {
    beta[j] ~ dnorm(0,0.001)
    }
    for (i in 1:hh) {    
    y[i,1:nc] ~ dmulti( p[i,1:nc] , 1 )                          
    Lkd[i] <- exp(sum(LL[i,1:nc]))
    for (k in 1:nc) {    
    LL[i,k] <- y[i,k]*log(p[i,k])*avail[i,k]
    phi1[i,k] <- exp(beta[1]*time[i,k]+ beta[2]*cost[i,k]+ beta[3]*cons2[i,k]+ beta[4]*cons3[i,k]+ beta[5]*cons4[i,k])
    phi2[i,k] <- phi1[i,k]*avail[i,k]
    p[i,k]  <- phi1[i,k] / sum(phi2[i,])
    }}}
  • Rコード
    library(R2jags)
    data <- read.table("modechoice.csv", sep=",", header=T)
    summary(data)
    # サンプル数
    hh<-nrow(data)
    # 選択肢数
    nc<-4
    # パラメータ数
    np <- 5
    # 被説明変数
    y <- as.matrix(data[,3:6])
    # 説明変数
    time <- as.matrix(data[,13:16])
    cost <- as.matrix(data[,9:12])
    cons2 <- matrix(0,nrow=hh,ncol=nc); cons2[,2] <- 1
    cons3 <- matrix(0,nrow=hh,ncol=nc); cons3[,3] <- 1
    cons4 <- matrix(0,nrow=hh,ncol=nc); cons4[,4] <- 1
    # 選択肢利用可能性
    avail <- as.matrix(data[,18:21])
    # 初期値
    in1 <- list(beta=c(0,0,0,0,0))
    in2 <- list(beta=c(1,1,1,1,1))
    in3 <- list(beta=c(-1,-1,-1,-1,-1))
    beta<-c(0,0,0,0,0)
    Data <- list("hh","nc","np","y", "time", "cost","cons2","cons3","cons4","avail")
    inits <- list(in1, in2, in3)
    parameters <- c("beta")
    model.file <- system.file(package="R2jags", "model", "mnl.txt")
    mnl.jags <- jags(data=Data, inits=inits, n.iter=500, parameters,
    n.burnin=100, n.chains=3, model.file=model.file)
    print(mnl.jags, digits=3)
    plot(mnl.jags)
    traceplot(mnl.jags)
    mnl.fit <- update(mnl.jags)
    print(mnl.fit, digits=3)
  • NLモデル
  • JAGSファイル(nl.txt)
    model { 
    for (i in 1:hh) { 
    for (k in 1:nc) {    
    LL[i,k] <- y[i,k]*log(p[i,k])*avail[i,k]
    }
    }
    L <- sum(LL[,])
    for (i in 1:hh) {
    for (k in 1:nc) {
    phi1[i,k] <- exp(time[i,k]*b[1]+cost[i,k]*b[2]+cons2[i,k]*b[3]+cons4[i,k]*b[4])
    phi2[i,k] <- phi1[i,k]*avail[i,k]
    }
    }
    for (i in 1:hh) {  
    y[i,1:nc] ~ dmulti(p[i,1:nc],1)
    I1[i] <- log(sum(phi1[i,1:2]))
    I2[i] <- log(sum(phi1[i,3:4]))
    p[i,1] <- (exp(alpha*I1[i])/(exp(alpha*I1[i])+exp(alpha*I2[i])))*(phi1[i,1]/sum(phi2[i,1:2]))
    p[i,2] <- (exp(alpha*I1[i])/(exp(alpha*I1[i])+exp(alpha*I2[i])))*(phi1[i,2]/sum(phi2[i,1:2]))
    p[i,3] <- (exp(alpha*I2[i])/(exp(alpha*I1[i])+exp(alpha*I2[i])))*(phi1[i,3]/sum(phi2[i,3:4]))
    p[i,4] <- (exp(alpha*I2[i])/(exp(alpha*I1[i])+exp(alpha*I2[i])))*(phi1[i,4]/sum(phi2[i,3:4]))
    }
    for (j in 1:np) { 
    b[j] ~ dnorm(0,0.001)
    beta[j] <- b[j]*alpha
    }
    alpha ~ dexp(1)
    }
  • Rコード
    data <- read.table("modechoice.csv", sep=",", header=T)
    summary(data)
    # サンプル数
    hh<-nrow(data)
    # 選択肢数
    nc<-4
    # パラメータ数
    np <- 4 
    y <- as.matrix(data[,3:6])
    time <- as.matrix(data[,13:16])
    cost <- as.matrix(data[,9:12])
    cons2 <- matrix(0,nrow=hh,ncol=nc); cons2[,2] <- 1
    cons4 <- matrix(0,nrow=hh,ncol=nc); cons4[,4] <- 1
    avail <- as.matrix(data[,18:21])
    in1 <- list(b=c(0,0,0,0),alhpa=0)
    in2 <- list(b=c(1,1,1,1),alhpa=0.5)
    in3 <- list(b=c(-1,-1,-1,-1),alhpa=0.9)
    b<-c(0,0,0,0)
    alpha<-0
    Data <- list("hh","nc","np","y", "time", "cost","cons2","cons4","avail")
    inits <- list(in1, in2, in3)
    parameters <- c("b","alpha")
    model.file <- system.file(package="R2jags", "model", "nl.txt")
    nl.jags <- jags(data=Data, inits=inits, n.iter=500, parameters,
    n.burnin=100, n.chains=3, model.file=model.file)
    print(nl.jags, digits=3)
    plot(nl.jags)
    traceplot(nl.jags)
    nl.fit <- update(nl.jags)
    print(nl.fit, digits=3)
  • mixed-logit model
  • JAGSファイル(mixed.txt)
    model {
    L <- sum(LL[,])
    for (j in 1:np) {
    beta[j] ~ dnorm(0,0.001)
    }
    for (i in 1:hh) {    
    y[i,1:nc] ~ dmulti( p[i,1:nc] , 1 )    
    InvL[i] <- 1/exp(sum(LL[i,]))
    for (k in 1:nc) {    
    LL[i,k] <- y[i,k]*log(p[i,k])*avail[i,k]
    phi1[i,k] <- exp(beta[1]*time[i,k]+ beta[2]*cost[i,k]+ beta[3]*cons2[i,k]+
    beta[4]*age2[i,k]+beta[5]*cons3[i,k]+beta[6]*age3[i,k]+
    beta[7]*cons4[i,k]+beta[8]*age4[i,k])
    phi2[i,k] <- phi1[i,k]*avail[i,k]
    p[i,k]  <- phi1[i,k] / sum(phi2[i,])
    }}}
  • Rコード
    library(R2jags)
    data <- read.table("modechoice.csv", sep=",", header=T)
    summary(data)
    # サンプル数
    hh<-nrow(data)
    # 選択肢数
    nc<-4
    # パラメータ数
    np <- 8
    # 被説明変数
    y <- as.matrix(data[,3:6])
    # 説明変数
    time <- as.matrix(data[,13:16])
    cost <- as.matrix(data[,9:12])
    cons2 <- matrix(0,nrow=hh,ncol=nc); cons2[,2] <- 1
    cons3 <- matrix(0,nrow=hh,ncol=nc); cons3[,3] <- 1
    cons4 <- matrix(0,nrow=hh,ncol=nc); cons4[,4] <- 1
    gend2 <- cons2 * data$gend
    gend3 <- cons3 * data$gend
    gend4 <- cons4 * data$gend
    # 選択肢利用可能性
    avail <- as.matrix(data[,18:21])
    # 初期値
    in1 <- list(beta=c(0,0,0,0,0,0,0,0))
    in2 <- list(beta=c(1,1,1,1,1,1,1,1))
    in3 <- list(beta=c(-1,-1,-1,-1,-1,-1,-1,-1))
    beta<-c(0,0,0,0,0,0,0,0)
    Data <- list("hh","nc","np","y", "time", "cost","cons2","cons3","cons4",
    "gend2","gend3","gend4","avail")
    inits <- list(in1, in2, in3)
    parameters <- c("beta")
    model.file <- system.file(package="R2jags", "model", "mixed.txt")
    mixed.jags <- jags(data=Data, inits=inits, n.iter=500, parameters,
    n.burnin=100, n.chains=3, model.file=model.file)
    print(mixed.jags, digits=3)
    plot(mixed.jags)
    traceplot(mixed.jags)
    mixed.fit <- update(mixed.jags)
    print(mixed.fit, digits=3) 

ベイズ法(パッケージMCMCpack)

  • MCMCpackのMCMCmnlを使った多項ロジスティック回帰モデル
    library(MCMCpack)
    data <- read.table("modechoice.csv", sep=",", header=T)
    summary(data)
  • 全選択肢利用可能な場合&選択肢共通変数のみの場合
    mode.post1 <- 
    MCMCmnl(data$choice ~
    choicevar(data$time.car, "time", "car") +
    choicevar(data$time.carpool, "time", "carpool")+
    choicevar(data$time.bus, "time", "bus")+ 
    choicevar(data$time.rail, "time", "rail")+
    choicevar(data$cost.car, "cost", "car")+
    choicevar(data$cost.carpool, "cost", "carpool")+
    choicevar(data$cost.bus, "cost", "bus")+
    choicevar(data$cost.rail, "cost", "rail"),
    baseline="rail", mcmc.method="IndMH", B0=0,
    verbose=10000, mcmc=100000, thin=10)
    plot(mode.post1)
    summary(mode.post1)
  • 全選択肢利用可能な場合&選択肢固有変数を含む場合
    mode.post2 <- 
    MCMCmnl(data$choice ~
    choicevar(data$time.car, "time", "car") +
    choicevar(data$time.carpool, "time", "carpool")+
    choicevar(data$time.bus, "time", "bus")+
    choicevar(data$time.rail, "time", "rail")+
    choicevar(data$cost.car, "cost", "car")+
    choicevar(data$cost.carpool, "cost", "carpool")+
    choicevar(data$cost.bus, "cost", "bus")+
    choicevar(data$cost.rail, "cost", "rail")+
    data$age+data$car.own+data$gend,
    baseline="rail", mcmc.method="IndMH", B0=0,
    verbose=10000, mcmc=100000, thin=10)
    plot(mode.post1)
    summary(mode.post2)
  • 選択肢集合を考慮した場合
    # 被説明変数を数値データに変換
    # 選択できない選択肢に-999を与える
    data$mode.choice <- as.numeric(data$choice)
    y <- matrix(0, nrow=nrow(data), ncol=4)
    colnames(y)<-c("car","carpool","bus","rail")
    for(i in 1:hh){
    	if(data$avail.car[i] == 0){y[i,1] <- -999}
    	if(data$choice[i] == "car"){y[i,1] <- 1}
    	if(data$avail.carpool[i] == 0){y[i,2] <- -999}
    	if(data$choice[i] == "carpool"){y[i,2] <- 1}
    	if(data$avail.bus[i] == 0){y[i,3] <- -999}
    	if(data$choice[i] == "bus"){y[i,3] <- 1}
    	if(data$avail.rail[i] == 0){y[i,4] <- -999}
    	if(data$choice[i] == "rail"){y[i,4] <- 1}
    }
    mode.post3 <- 
    MCMCmnl(y ~
    choicevar(data$time.car, "time", "car") +
    choicevar(data$time.carpool, "time", "carpool")+
    choicevar(data$time.bus, "time", "bus")+
    choicevar(data$time.rail, "time", "rail")+
    choicevar(data$cost.car, "cost", "car")+
    choicevar(data$cost.carpool, "cost", "carpool")+
    choicevar(data$cost.bus, "cost", "bus")+
    choicevar(data$cost.rail, "cost", "rail"),
    baseline="rail", mcmc.method="IndMH", B0=0,
    verbose=10000, mcmc=100000, thin=10)
    plot(mode.post3)
    summary(mode.post3)
  • 選択肢固有変数を取り入れる場合
    # car.ownを"car"の選択肢固有変数にする
    # "carpool", "bus", "rail"のcar.ownの値を0にする
    data$car.own0 <- 0
    mode.post4 <- 
    MCMCmnl(y ~
    choicevar(data$time.car, "time", "car") +
    choicevar(data$time.carpool, "time", "carpool")+
    choicevar(data$time.bus, "time", "bus")+
    choicevar(data$time.rail, "time", "rail")+
    choicevar(data$cost.car, "cost", "car")+
    choicevar(data$cost.carpool, "cost", "carpool")+
    choicevar(data$cost.bus, "cost", "bus")+
    choicevar(data$cost.rail, "cost", "rail")+
    choicevar(data$car.own, "carown", "car")+
    choicevar(data$car.own0, "carown", "carpool")+
    choicevar(data$car.own0, "carown", "bus")+
    choicevar(data$car.own0, "carown", "rail"),
    baseline="rail", mcmc.method="IndMH", B0=0,
    verbose=10000, mcmc=100000, thin=10)
    plot(mode.post4)
    summary(mode.post4)

トップ   編集 凍結解除 差分 バックアップ 添付 複製 名前変更 リロード   新規 一覧 単語検索 最終更新   ヘルプ   最終更新のRSS
Last-modified: 2010-04-19 (月) 13:01:36 (3501d)