R语言-multiROC package
欢迎来到医科研,这里是白介素2的读书笔记,跟我一起聊临床与科研的故事, 生物医学数据挖掘,R语言,TCGA、GEO数据挖掘。
找ROC相关的包
Sys.setlocale('LC_ALL','C')
## [1] "C"
require(pkgsearch)
## Loading required package: pkgsearch
## Warning: package 'pkgsearch' was built under R version 3.6.1
rocPkg <- pkg_search(query="ROC",size=200)
rocPkg
head(rocPkg)
multiROC包
require(multiROC)
## Loading required package: multiROC
## Warning: package 'multiROC' was built under R version 3.6.1
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
set.seed(123456)
total_number <- nrow(iris)
train_idx <- sample(total_number, round(total_number*0.6))
## 随机抽样分train,test组
train_df <- iris[train_idx, ]
test_df <- iris[-train_idx, ]
分训练组与测试组
set.seed(123456)
total_number <- nrow(iris)
train_idx <- sample(total_number, round(total_number*0.6))
## 随机抽样分train,test组
train_df <- iris[train_idx, ]
test_df <- iris[-train_idx, ]
Random forest法
随机森林模型
rf_res <- randomForest::randomForest(Species~., data = train_df, ntree = 100)
rf_res
##
## Call:
## randomForest(formula = Species ~ ., data = train_df, ntree = 100)
## Type of random forest: classification
## Number of trees: 100
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 7.78%
## Confusion matrix:
## setosa versicolor virginica class.error
## setosa 30 0 0 0.0000000
## versicolor 0 29 3 0.0937500
## virginica 0 4 24 0.1428571
rf_pred <- predict(rf_res, test_df, type = 'prob')
rf_pred <- data.frame(rf_pred)
colnames(rf_pred) <- paste(colnames(rf_pred), "_pred_RF")
logistic回归模型
mn_res <- nnet::multinom(Species ~., data = train_df)
## # weights: 18 (10 variable)
## initial value 98.875106
## iter 10 value 12.524348
## iter 20 value 5.495452
## iter 30 value 5.352345
## iter 40 value 5.304605
## iter 50 value 5.251277
## iter 60 value 5.250840
## final value 5.250449
## converged
mn_pred <- predict(mn_res, test_df, type = 'prob')
mn_pred <- data.frame(mn_pred)
colnames(mn_pred) <- paste(colnames(mn_pred), "_pred_MN")
整合预测值与真实值
设置真实值1为TRUE
true_label <- dummies::dummy(test_df$Species, sep = ".")
## Warning in model.matrix.default(~x - 1, model.frame(~x - 1), contrasts =
## FALSE): non-list contrasts argument ignored
true_label <- data.frame(true_label)
colnames(true_label) <- gsub(".*?\\.", "", colnames(true_label))
colnames(true_label) <- paste(colnames(true_label), "_true")
## 整合
final_df <- cbind(true_label, rf_pred, mn_pred)
head(final_df)
## setosa _true versicolor _true virginica _true setosa _pred_RF
## 6 1 0 0 1.00
## 7 1 0 0 1.00
## 17 1 0 0 1.00
## 18 1 0 0 1.00
## 19 1 0 0 0.98
## 22 1 0 0 1.00
## versicolor _pred_RF virginica _pred_RF setosa _pred_MN
## 6 0.00 0 1
## 7 0.00 0 1
## 17 0.00 0 1
## 18 0.00 0 1
## 19 0.02 0 1
## 22 0.00 0 1
## versicolor _pred_MN virginica _pred_MN
## 6 6.639968e-11 4.388237e-33
## 7 5.584571e-09 9.246104e-31
## 17 1.097355e-13 3.009961e-37
## 18 1.539783e-10 4.700440e-33
## 19 4.592017e-11 6.295323e-34
## 22 8.766548e-11 8.012250e-33
multiROC
force_diag=T, 则TPR,FPR的值转换为0-1之间
roc_res <- multi_roc(final_df, force_diag=T)
plot
多出的MacroROC,MicroROC为计算的新的AUC值 data为plot_roc_df, x为1-特异度, y为敏感度
将数据转换为ggplot喜欢的格式
plot_roc_df <- plot_roc_data(roc_res)
head(plot_roc_df)
## Specificity Sensitivity Group AUC Method
## 1 1 0.00 setosa 1 RF
## 2 1 0.05 setosa 1 RF
## 3 1 0.10 setosa 1 RF
## 4 1 0.15 setosa 1 RF
## 5 1 0.20 setosa 1 RF
## 6 1 0.25 setosa 1 RF
## 绘图
require(ggplot2)
## Loading required package: ggplot2
ggplot(plot_roc_df, aes(x = 1-Specificity, y=Sensitivity)) +
geom_path(aes(color = Group, linetype=Method), size=1.5) +
geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1),
colour='grey', linetype = 'dotdash') +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.justification=c(1, 0), legend.position=c(.95, .05),
legend.title=element_blank(),
legend.background = element_rect(fill=NULL, size=0.5,
linetype="solid", colour ="black"))
总结
multiROC包适用于比较不同的模型应用于诊断的ROC比较
multiROC可计算多分类诊断模型