Skip to content

Commit 801c8f8

Browse files
authored
Merge pull request #1 from mini-pw/main
update
2 parents a9b8459 + 02d8aa7 commit 801c8f8

File tree

4 files changed

+198
-17
lines changed

4 files changed

+198
-17
lines changed

Materialy/Lab2/basic_modeling.R

+8-7
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,13 @@ model <- lm(y~., df)
1414
model
1515
summary(model)
1616

17-
new <- data.frame(x1 =1, x2 = 2, x3 = 3)
18-
17+
new <- data.frame(x1 = 1, x2 = 2, x3 = 3)
1918

2019
predict(model, new)
2120

22-
pred <- predict(model)
21+
pred <- predict(model, df)
2322

24-
(MSE <- sum((y - pred)^2)/n)
23+
(MSE <- mean((y - pred)^2))
2524

2625
library(rpart)
2726

@@ -31,7 +30,7 @@ summary(tree)
3130
library(rpart.plot)
3231
rpart.plot(tree)
3332

34-
pred_tree <- predict(tree)
33+
pred_tree <- predict(tree, df)
3534
(MSE <- sum((y - pred_tree)^2)/n)
3635

3736

@@ -69,8 +68,10 @@ class_glm <- ifelse(pred_glm > 0.5, "Yes", "No")
6968

7069
Acc_glm = sum(class_glm == Pima.te$type) / nrow(Pima.te)
7170
Acc_glm
72-
71+
roc_obj_glm <- roc(Pima.te$type, pred_glm)
72+
plot(roc_obj_glm)
73+
auc(roc_obj_glm)
7374

7475
library(e1071)
7576
model_svm <- svm(type~., Pima.te)
76-
pred_svm <- predict(model_svm)
77+
pred_svm <- predict(model_svm)

Materialy/Lab2/less_basic_modeling.R

+25-10
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44
library(OpenML)
55
library(mlr)
6+
#library(mlr3)
7+
#library(mlr3learners)
8+
#library(mlr3measures)
69

710
set.seed(1)
811

@@ -27,7 +30,7 @@ getHyperPars(classif_lrn)
2730

2831
# audyt modelu
2932
cv <- makeResampleDesc("CV", iters = 7)
30-
r <- resample(classif_lrn, classif_task, cv, measures = list(auc), models = TRUE)
33+
r <- resample(classif_lrn, classif_task, cv, measures = list(auc, mmce), models = TRUE)
3134
r$models
3235
AUC <- r$aggr
3336
AUC
@@ -38,21 +41,26 @@ listMeasures(obj = "classif")
3841

3942
### Zadanie 1
4043

41-
# Uzywajac pakietu OpenML zaladuj dowolny zbior danych (zalecany projektowy je¿eli jest dostepny) oraz stworz dowolny model nastepnie poddajac go audytowi
44+
# Uzywajac pakietu OpenML zaladuj dowolny zbior danych (zalecany projektowy je¿eli jest dostepny) oraz
45+
# stworz audyt dowolnego modelu
4246
# Protip: Skopiuj kod powyzej i go przerob
4347

48+
# Krzywa roc z modelu
49+
model <- r$models[[7]]
50+
pred <- predict(model, newdata = monks)
51+
pred <- pred$data$prob.1
52+
roc_obj <- roc(monks$class, pred)
53+
plot(roc_obj)
4454

45-
46-
# BARDZO WAZNA UWAGA O KOLEJNOSCI ARGUMENTOW
47-
48-
model <- r$models[[1]]
55+
# Macierz pomylek z modelu
56+
model <- r$models[[7]]
57+
mlr::calculateConfusionMatrix(predict(model, newdata = monks))
4958

5059
### Reprezentacja poszczególnych drzew
5160
ranger::treeInfo(model$learner.model, 1)
5261

5362

54-
p <- predict(model, newdata = monks)
55-
p
63+
# Podzial testowy/treningowy
5664

5765
m <- sample(1:nrow(monks), 0.7*nrow(monks))
5866
monks_train <- monks[m,]
@@ -61,6 +69,13 @@ monks_test <- monks[-m,]
6169
classif_task <- makeClassifTask(id = "lvr", data = monks_train, target = "class")
6270
classif_lrn <- makeLearner("classif.ranger", par.vals = list(num.trees = 500, mtry = 3), predict.type = "prob")
6371
model <- train(classif_lrn, classif_task)
64-
predict(model, newdata = monks_train)
72+
### Zadanie 2 Stworz model liniowy korzystajacz funkcj glm dla danych monks. Porównaj AUC obu modeli na zbiorze testowym
73+
74+
model_linear <- glm(class~., monks_train, family = "binomial")
75+
roc_obj_glm <- roc(monks_test$class, predict(model_linear, monks_test, type = "response"))
6576

66-
### Zadanie 2 Stworz model liniowy korzystajacz funkcj glm dla danych monks. Porównaj MSE obu modeli
77+
pred <- predict(model, newdata = monks_test)$data
78+
pred <- pred$prob.0
79+
roc_obj_ranger <- roc(monks_test$class, pred)
80+
roc_obj_ranger$auc
81+
c("glm" = roc_obj_glm$auc, "ranger" = roc_obj_ranger$auc)
+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#install.packages("OpenML")
2+
#install.packages("mlr")
3+
4+
library(OpenML)
5+
library(mlr)
6+
library(pROC)
7+
#library(mlr3)
8+
#library(mlr3learners)
9+
#library(mlr3measures)
10+
11+
#set.seed(1)
12+
13+
### MONKS
14+
15+
# pobranie danych
16+
monks <- getOMLDataSet(data.id = 334L)
17+
monks <- monks$data
18+
head(monks)
19+
20+
# Podzial testowy/treningowy
21+
22+
m <- sample(1:nrow(monks), 0.7*nrow(monks))
23+
monks_train <- monks[m,]
24+
monks_test <- monks[-m,]
25+
26+
classif_task <- makeClassifTask(id = "lvr", data = monks_train, target = "class")
27+
28+
# listowanie learnerow ze wsparciem dla prawdopodobieñstw
29+
listLearners(properties = "prob")$class
30+
# listowanie zbioru hiperparametrów
31+
getLearnerParamSet("classif.ranger")
32+
33+
classif_lrn <- makeLearner("classif.ranger", par.vals = list(num.trees = 500, mtry = 3), predict.type = "prob")
34+
35+
getParamSet(classif_lrn)
36+
helpLearnerParam(classif_lrn)
37+
getHyperPars(classif_lrn)
38+
39+
40+
model <- train(classif_lrn, classif_task)
41+
42+
pred_train <- predict(model, newdata = monks_train)$data$prob.0
43+
pred_test <- predict(model, newdata = monks_test)$data$prob.0
44+
roc(monks_train$class, pred_train)
45+
roc(monks_test$class, pred_test)
46+
47+
48+
### TITANIC
49+
data(titanic_imputed, package = "DALEX")
50+
titanic_imputed$survived <- as.factor(titanic_imputed$survived)
51+
m <- sample(1:nrow(titanic_imputed), 0.7*nrow(titanic_imputed))
52+
titanic_train <- titanic_imputed[m,]
53+
titanic_test <- titanic_imputed[-m,]
54+
55+
classif_task <- makeClassifTask(id = "lvr", data = titanic_train, target = "survived")
56+
classif_lrn <- makeLearner("classif.ranger", par.vals = list(num.trees = 2000, mtry = 3), predict.type = "prob")
57+
model <- train(classif_lrn, classif_task)
58+
59+
pred_train <- predict(model, newdata = titanic_train)$data$prob.0
60+
pred_test <- predict(model, newdata = titanic_test)$data$prob.0
61+
roc(titanic_train$survived, pred_train)
62+
roc(titanic_test$survived, pred_test)
63+
64+
65+
### Walidacja krzy¿owa
66+
67+
classif_task <- makeClassifTask(id = "lvr", data = titanic_train, target = "survived")
68+
classif_lrn <- makeLearner("classif.ranger", par.vals = list(num.trees = 60, mtry = 3), predict.type = "prob")
69+
cv <- makeResampleDesc("CV", iters = 7)
70+
r <- resample(classif_lrn, classif_task, cv, measures = mlr::auc, models = TRUE)
71+
r$models
72+
AUC <- r$aggr
73+
AUC
74+
75+
76+
77+
### Zadanie 2 Stworz model liniowy korzystajacz funkcj glm dla danych monks. Porównaj AUC obu modeli na zbiorze testowym
78+
model_linear <- glm(class~., monks_train, family = "binomial")
79+
roc_obj_glm <- roc(monks_test$class, predict(model_linear, monks_test, type = "response"))
80+
81+
pred <- predict(model, newdata = monks_test)$data
82+
pred <- pred$prob.0
83+
roc_obj_ranger <- roc(monks_test$class, pred)
84+
roc_obj_ranger$auc
85+
c("glm" = roc_obj_glm$auc, "ranger" = roc_obj_ranger$auc)

Materialy/Lab3/BD&Shap.R

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
### Model ###
2+
3+
data(titanic_imputed, package = "DALEX")
4+
5+
model <- ranger::ranger(survived~., data = titanic_imputed, classification = TRUE, probability = TRUE)
6+
7+
# Podejrzyjmy parametry
8+
model
9+
10+
# Podejrzyjmy predyckje
11+
predict(model, head(titanic_imputed))$predictions
12+
13+
14+
###DALEX###
15+
16+
# Explainer jest to obiekt bedacy portem do wszystkich funckjonalnosci. Opakowuje on model w jednolita strukture ktora potem jest wykorzystywana do tego
17+
# aby wyliczyc wszystkie wyjasnienia. Kluczowe elementy to model, data, y oraz predict_function. Domyslnie DALEX wspiera duzo roznych predict function.
18+
19+
library(DALEX)
20+
library(DALEXtra)
21+
22+
explainer <- explain(model = model,
23+
data = titanic_imputed,
24+
y = titanic_imputed$survived) # WAZNE: to musi byc wartosc numerczna dla binarnej kalsyfikacji
25+
26+
# Jezeli verbose = TRUE to otrzymamy podsumowanie naszego modelu
27+
# Preparation of a new explainer is initiated
28+
# -> model label : ranger ( default )
29+
# -> data : 2207 rows 8 cols
30+
# -> target variable : 2207 values
31+
# -> predict function : yhat.ranger will be used ( default )
32+
# -> predicted values : No value for predict function target column. ( default )
33+
# -> model_info : package ranger , ver. 0.12.1 , task classification ( default )
34+
# -> predicted values : numerical, min = 0.01430847 , mean = 0.3222976 , max = 0.9884335
35+
# -> residual function : difference between y and yhat ( default )
36+
# -> residuals : numerical, min = -0.7825395 , mean = -0.0001408668 , max = 0.8849883
37+
# A new explainer has been created!
38+
39+
explainer$predict_function
40+
?yhat
41+
methods("yhat")
42+
43+
44+
library(mlr)
45+
titanic_imputed_fct <- titanic_imputed
46+
titanic_imputed_fct$survived <- as.factor(titanic_imputed_fct$survived)
47+
48+
classif_task <- makeClassifTask(data = titanic_imputed_fct, target = "survived")
49+
classif_lrn <- makeLearner("classif.svm", predict.type = "prob")
50+
model_mlr <- train(classif_lrn, classif_task)
51+
52+
explainer_mlr <- explain(model = model_mlr,
53+
data = titanic_imputed_fct,
54+
y = as.numeric(as.character(titanic_imputed_fct$survived)))
55+
56+
# Widzimy, ¿e mlr tez jest domyslnie wspierany
57+
58+
### Break Down ###
59+
60+
pp_ranger_bd_1 <- predict_parts(explainer, new_observation = titanic_imputed[1,])
61+
plot(pp_ranger_bd_1)
62+
63+
pp_ranger_bd_2 <- predict_parts(explainer, new_observation = titanic_imputed[13,])
64+
plot(pp_ranger_bd_2)
65+
66+
67+
### SHAP ###
68+
69+
pp_ranger_shap_1 <- predict_parts(explainer, new_observation = titanic_imputed[1,], type = "shap", B = 10)
70+
plot(pp_ranger_shap_1)
71+
72+
pp_ranger_shap_2 <- predict_parts(explainer, new_observation = titanic_imputed[13,], type = "shap", B = 10)
73+
plot(pp_ranger_shap_2)
74+
75+
76+
77+
# Zadanko
78+
79+
# Wez dowolny zbior, stworz dowolny model oraz wygeneruj dla niego wyjasnienie BreakDown
80+

0 commit comments

Comments
 (0)