環境ガバナンスのデータサイエンス/空間モデリング特論　R演習2

演習の概要

空間統計の基礎

注意事項

慶應義塾大学SFCで開講している「環境ガバナンスのデータサイエンス」「空間モデリング特論」の授業履修者向けの演習用ページです。
必ずしも全てのバージョンのRやOSで動作確認を行っていません。この演習用ページを作成している段階では、R3.5.3を使っています。
Rの更新などにより、Rコードなどを予告無しに変更する場合があります。

データ分析の準備

データのダウンロード

演習で用いるデータはここからダウンロードしてください。
ここでは./直下にgisdataフォルダを作成し、setwd("gisdata")とディレクトリを指定している

パッケージのインストール

install.packages()でパッケージをインストールし、library()でパッケージを呼び出す
ここでは以下のパッケージを用います

install.packages("spdep")
install.packages("sp")
install.packages("maptools")
install.packages("classInt")
install.packages("rgdal")
install.packages("sf")
install.packages("spatstat")

library(spdep)
library(sp)
library(maptools)
library(classInt)
library(rgdal)
library(sf)
library(spatstat)

空間データ分析の基礎

空間データの密度

点データの作成

px <- rnorm(500, mean=0.5, sd=0.15)
py <- rnorm(500, mean=0.5, sd=0.15)
# 一様分布の場合（参考）
# px <- runif(500)
# py <- runif(500)
pz <- as.data.frame(rep(1, 500))
colnames(pz) <- c("pz")

点データと点密度の可視化

pnt <- spatstat::ppp(px, py, c(0,1), c(0,1))
plot(pnt, type="p")
plot(density(pnt), 0.1)
contour(density(pnt), add=T)
plot(pnt, type="p", add=T)

## Loading required package: sp

## Loading required package: Matrix

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Loading required package: sf

## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3

## Error: package or namespace load failed for 'spdep' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
##  there is no package called 'expm'

コドラートカウント：その１

pnt_xy <- cbind(px, py)
pnt_sp <- SpatialPoints(data.frame(px, py))
pnt_spdf <- SpatialPointsDataFrame(pnt_sp, pz)
quadratcount(pnt, nx=2, ny=2)
quadratcount(pnt, nx=2, ny=2)/(0.5^2)
plot(pnt, type="p")
plot(quadratcount(pnt, nx=2, ny=2), add=T, col="red")

##          x
## y         [0,0.5) [0.5,1]
##   [0.5,1]     129     140
##   [0,0.5)     121     110

##          x
## y         [0,0.5) [0.5,1]
##   [0.5,1]     516     560
##   [0,0.5)     484     440

コドラートカウント：その２

quadratcount(pnt, nx=4, ny=4)
quadratcount(pnt, nx=4, ny=4)/(0.25^2)
plot(pnt, type="p")
plot(quadratcount(pnt, nx=4, ny=4), add=T, col="red")

##             x
## y            [0,0.25) [0.25,0.5) [0.5,0.75) [0.75,1]
##   [0.75,1]          0          9         12        2
##   [0.5,0.75)       10        110        120        6
##   [0.25,0.5)        9        103         94        6
##   [0,0.25)          1          8         10        0

##             x
## y            [0,0.25) [0.25,0.5) [0.5,0.75) [0.75,1]
##   [0.75,1]          0        144        192       32
##   [0.5,0.75)      160       1760       1920       96
##   [0.25,0.5)      144       1648       1504       96
##   [0,0.25)         16        128        160        0

属性値の基本統計量と標準化

データの読み込み

# データの読み込み
lph <- read.table("lph2010.csv", sep=",", header=TRUE)
x <- lph$Easting
y <- lph$Northing
z <- as.data.frame(cbind(lph$JCODE, lph$lph2010))
colnames(z) <- c("JCODE", "lph2010")
lph_sp <- SpatialPoints(data.frame(x, y))
lph_spdf <- SpatialPointsDataFrame(lph_sp, z)
# 図3.3
yoko_spdf <- lph_spdf[lph_spdf$JCODE>14000 & lph_spdf$JCODE<14200,]
pal0 <- c("grey80","black")
q_lph <- classIntervals(round(yoko_spdf$lph2010/10000,1), n=4, style="fisher")
# plot(q_lph, pal=pal0)
q_lph_Col <- findColours(q_lph,pal0)
plot(yoko_spdf,col=q_lph_Col, pch=20, cex=1.8, axes=TRUE, cex.axis=1.5)
legend("topright",fill=attr(q_lph_Col,"palette"),
legend=names(attr(q_lph_Col,"table")), cex=1.3, bty="n")

## Error: package or namespace load failed for 'spdep' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
##  there is no package called 'expm'

平均・分散・標準偏差の計算

# 標本平均
mean(lph$lph2010)

## [1] 214425.5

# 標本分散
var(lph$lph2010) * (length(lph$lph2010)-1)/length(lph$lph2010)

## [1] 95320500063

# 標本標準偏差
sqrt(var(lph$lph2010) * (length(lph$lph2010)-1)/length(lph$lph2010))

## [1] 308740.2

# 不偏分散
var(lph$lph2010)

## [1] 95332605790

# 不偏標準偏差
sd(lph$lph2010)

## [1] 308759.8

標準化

lph$lph2010.scale <- scale(lph$lph2010)
summary(lph$lph2010.scale)

##        V1         
##  Min.   :-0.6794  
##  1st Qu.:-0.4822  
##  Median :-0.2054  
##  Mean   : 0.0000  
##  3rd Qu.: 0.1379  
##  Max.   :31.6932

# plot(density(lph$lph2010.scale))
hist(lph$lph2010.scale, col="grey", xlim=c(-2,4), ylim=c(0,4000), breaks=48,
main="", xlab="標準化後の地価", ylab="頻度", cex.axis=1.3, cex.lab=1.2)

地域属性の差の比較

データの読み込み

spm.shp <- sf::st_read("tma_spm.shp")

SPMデータ（属性SPM07）を可視化する

spm.shp[, "SPM07"]%>%
  plot()

地域差の検定

# 都県別に公示地価地点データを抽出
spm.shp$KCODE <- floor(spm.shp$ID/1000000)
spm_11 <- spm.shp[spm.shp$KCODE==11,]
spm_12 <- spm.shp[spm.shp$KCODE==12,]
spm_13 <- spm.shp[spm.shp$KCODE==13,]
spm_14 <- spm.shp[spm.shp$KCODE==14,]

hist(spm_13$SPM07, col="grey", xlim=c(0.04, 0.10), ylim=c(0,25),
xlab="SPM(mg/m3)", ylab="頻度", main="", cex.lab=1.2, cex.axis=1.3)

# 図3.6(b)
plot(ecdf(spm_13$SPM07), do.point=FALSE, verticals=TRUE, main="",
lwd=2, cex.axis=1.3, cex.lab=1.2)
z <- seq(0.04, 0.09, by=0.001)
lines(z, pnorm(z, mean=mean(spm_13$SPM07), sd=sd(spm_13$SPM07)), lty=2, lwd=2)

# 平均値の差の検定（等分散）
t.test(spm_13$SPM07, spm_14$SPM07, var.equal=T)

## 
##  Two Sample t-test
## 
## data:  spm_13$SPM07 and spm_14$SPM07
## t = 0.1037, df = 176, p-value = 0.9175
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.002281061  0.002534066
## sample estimates:
##  mean of x  mean of y 
## 0.06347059 0.06334409

# 平均値の差の検定（不等分散）
t.test(spm_13$SPM07, spm_14$SPM07, var.equal=F)

## 
##  Welch Two Sample t-test
## 
## data:  spm_13$SPM07 and spm_14$SPM07
## t = 0.10334, df = 171.3, p-value = 0.9178
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.002289745  0.002542750
## sample estimates:
##  mean of x  mean of y 
## 0.06347059 0.06334409

地域格差

ジニ係数

# install.packages("reldist")
# install.packages("ineq")
# library(reldist)
# library(ineq)
gini(lph$lph2010)
inc <- read.table("inc2006.csv", sep=",", header=TRUE)
library(reldist)
gini(inc$INC2006)
#
library(ineq)
inc.gini <- Lc(inc$INC2006)
inc.gini
plot(inc.gini, cex.axis=1.3, cex.lab=1.2, cex.main=1.3, lwd=3)

## 
## Attaching package: 'ineq'

## The following object is masked from 'package:reldist':
## 
##     entropy

## [1] 0.5038403

## [1] 0.0820301

## $p
##  [1] 0.00000000 0.02127660 0.04255319 0.06382979 0.08510638 0.10638298
##  [7] 0.12765957 0.14893617 0.17021277 0.19148936 0.21276596 0.23404255
## [13] 0.25531915 0.27659574 0.29787234 0.31914894 0.34042553 0.36170213
## [19] 0.38297872 0.40425532 0.42553191 0.44680851 0.46808511 0.48936170
## [25] 0.51063830 0.53191489 0.55319149 0.57446809 0.59574468 0.61702128
## [31] 0.63829787 0.65957447 0.68085106 0.70212766 0.72340426 0.74468085
## [37] 0.76595745 0.78723404 0.80851064 0.82978723 0.85106383 0.87234043
## [43] 0.89361702 0.91489362 0.93617021 0.95744681 0.97872340 1.00000000
## 
## $L
##  [1] 0.00000000 0.01602326 0.03251440 0.04907458 0.06571913 0.08323042
##  [7] 0.10113290 0.11912743 0.13752081 0.15609827 0.17479079 0.19352934
## [13] 0.21242128 0.23138226 0.25036626 0.26944229 0.28933905 0.30939688
## [19] 0.32983823 0.35027958 0.37092803 0.39159182 0.41243969 0.43341029
## [25] 0.45468003 0.47596512 0.49735758 0.51883442 0.54035728 0.56197986
## [31] 0.58378652 0.60574659 0.62785239 0.64996587 0.67237081 0.69508257
## [37] 0.71780200 0.74062881 0.76373942 0.78738696 0.81112654 0.83493515
## [43] 0.85942641 0.88440858 0.91011943 0.93611407 0.96302915 1.00000000
## 
## $L.general
##  [1]    0.00000   44.44681   90.19149  136.12766  182.29787  230.87234
##  [7]  280.53191  330.44681  381.46809  433.00000  484.85106  536.82979
## [13]  589.23404  641.82979  694.48936  747.40426  802.59574  858.23404
## [19]  914.93617  971.63830 1028.91489 1086.23404 1144.06383 1202.23404
## [25] 1261.23404 1320.27660 1379.61702 1439.19149 1498.89362 1558.87234
## [31] 1619.36170 1680.27660 1741.59574 1802.93617 1865.08511 1928.08511
## [37] 1991.10638 2054.42553 2118.53191 2184.12766 2249.97872 2316.02128
## [43] 2383.95745 2453.25532 2524.57447 2596.68085 2671.34043 2773.89362
## 
## attr(,"class")
## [1] "Lc"

地域変動係数

Cv <- sd(lph$lph2010)/mean(lph$lph2010)
Cv

## [1] 1.439939

地域特化係数

まず人口データを読み込み、地域特化係数を計算する。次に、日本の都道府県行政境界データを読み込み、地域特化係数を計算したデータと連結する。

CLpop05 <- readr::read_csv("CLpop05.csv")
CLpop05$pop05CL3 <- (CLpop05$POP3 / CLpop05$POP05) / (sum(CLpop05$POP3) /  sum(CLpop05$POP05))

jpn_pref <- sf::st_read("jpn_pref.shp")

jpn_pref_CL <- dplyr::inner_join(jpn_pref, CLpop05, by=c("PREF_CODE"="PREFCODE" ))

地域特化係数を描画する

ggplot2::ggplot(data = jpn_pref_CL) + 
  geom_sf(aes(fill=pop05CL3)) + 
  scale_fill_distiller(palette="RdYlGn") + 
  theme_bw() +
  labs(title = "Coefficient of regional specialization (aged over 65)")

```

環境ガバナンスのデータサイエンス/空間モデリング特論 R演習2

演習の概要

注意事項

データ分析の準備

データのダウンロード

パッケージのインストール

空間データ分析の基礎

空間データの密度

点データの作成

点データと点密度の可視化

コドラートカウント：その１

コドラートカウント：その２

属性値の基本統計量と標準化

平均・分散・標準偏差の計算

標準化

地域属性の差の比較

地域差の検定

地域格差

ジニ係数

地域変動係数

地域特化係数

環境ガバナンスのデータサイエンス/空間モデリング特論　R演習2