Merge pull request #1 from mini-pw/main

amakarewicz · web-flow · commit 801c8f8db412 · 2021-03-18T15:04:34.000+01:00
update
diff --git a/Materialy/Lab2/basic_modeling.R b/Materialy/Lab2/basic_modeling.R
@@ -14,14 +14,13 @@ model <- lm(y~., df)
 model
 summary(model)
 
-new <- data.frame(x1 =1, x2 = 2, x3 = 3)
-
+new <- data.frame(x1 = 1, x2 = 2, x3 = 3)
 
 predict(model, new)
 
-pred <- predict(model)
+pred <- predict(model, df)
 
-(MSE <- sum((y - pred)^2)/n)
+(MSE <- mean((y - pred)^2))
 
 library(rpart)
 
@@ -31,7 +30,7 @@ summary(tree)
 library(rpart.plot)
 rpart.plot(tree)
 
-pred_tree <- predict(tree)
+pred_tree <- predict(tree, df)
 (MSE <- sum((y - pred_tree)^2)/n)
 
 
@@ -69,8 +68,10 @@ class_glm <- ifelse(pred_glm > 0.5, "Yes", "No")
 
 Acc_glm = sum(class_glm == Pima.te$type) / nrow(Pima.te)
 Acc_glm
-
+roc_obj_glm <- roc(Pima.te$type, pred_glm)
+plot(roc_obj_glm)
+auc(roc_obj_glm)
 
 library(e1071)
 model_svm <- svm(type~., Pima.te)
-pred_svm <- predict(model_svm)
+pred_svm <- predict(model_svm)
diff --git a/Materialy/Lab2/less_basic_modeling.R b/Materialy/Lab2/less_basic_modeling.R
@@ -3,6 +3,9 @@
 
 library(OpenML)
 library(mlr)
+#library(mlr3)
+#library(mlr3learners)
+#library(mlr3measures)
 
 set.seed(1)
 
@@ -27,7 +30,7 @@ getHyperPars(classif_lrn)
 
 # audyt modelu
 cv <- makeResampleDesc("CV", iters = 7)
-r <- resample(classif_lrn, classif_task, cv, measures = list(auc), models = TRUE)
+r <- resample(classif_lrn, classif_task, cv, measures = list(auc, mmce), models = TRUE)
 r$models
 AUC <- r$aggr
 AUC
@@ -38,21 +41,26 @@ listMeasures(obj = "classif")
 
 ### Zadanie 1
 
-# Uzywajac pakietu OpenML zaladuj dowolny zbior danych (zalecany projektowy je�eli jest dostepny) oraz stworz dowolny model nastepnie poddajac go audytowi
+# Uzywajac pakietu OpenML zaladuj dowolny zbior danych (zalecany projektowy je�eli jest dostepny) oraz
+# stworz audyt dowolnego modelu
 # Protip: Skopiuj kod powyzej i go przerob
 
+# Krzywa roc z modelu
+model <- r$models[[7]]
+pred <- predict(model, newdata = monks)
+pred <- pred$data$prob.1
+roc_obj <- roc(monks$class, pred)
+plot(roc_obj)
 
-
-# BARDZO WAZNA UWAGA O KOLEJNOSCI ARGUMENTOW
-
-model <- r$models[[1]]
+# Macierz pomylek z modelu
+model <- r$models[[7]]
+mlr::calculateConfusionMatrix(predict(model, newdata = monks))
 
 ### Reprezentacja poszczeg�lnych drzew
 ranger::treeInfo(model$learner.model, 1)
 
 
-p <- predict(model, newdata = monks)
-p
+# Podzial testowy/treningowy
 
 m <- sample(1:nrow(monks), 0.7*nrow(monks))
 monks_train <- monks[m,]
@@ -61,6 +69,13 @@ monks_test <- monks[-m,]
 classif_task <- makeClassifTask(id = "lvr", data = monks_train, target = "class")
 classif_lrn <- makeLearner("classif.ranger", par.vals = list(num.trees = 500, mtry = 3), predict.type = "prob")
 model <- train(classif_lrn, classif_task)
-predict(model, newdata = monks_train)
+### Zadanie 2 Stworz model liniowy korzystajacz funkcj glm dla danych monks. Por�wnaj AUC obu modeli na zbiorze testowym
+
+model_linear <- glm(class~., monks_train, family = "binomial")
+roc_obj_glm <- roc(monks_test$class, predict(model_linear, monks_test, type = "response"))
 
-### Zadanie 2 Stworz model liniowy korzystajacz funkcj glm dla danych monks. Por�wnaj MSE obu modeli
+pred <- predict(model, newdata = monks_test)$data
+pred <- pred$prob.0
+roc_obj_ranger <- roc(monks_test$class, pred)
+roc_obj_ranger$auc
+c("glm" = roc_obj_glm$auc, "ranger" = roc_obj_ranger$auc)
diff --git a/Materialy/Lab2/less_basic_modeling_v2.R b/Materialy/Lab2/less_basic_modeling_v2.R
@@ -0,0 +1,85 @@
+#install.packages("OpenML")
+#install.packages("mlr")
+
+library(OpenML)
+library(mlr)
+library(pROC)
+#library(mlr3)
+#library(mlr3learners)
+#library(mlr3measures)
+
+#set.seed(1)
+
+### MONKS
+
+# pobranie danych
+monks <- getOMLDataSet(data.id = 334L)
+monks <- monks$data
+head(monks)
+
+# Podzial testowy/treningowy
+
+m <- sample(1:nrow(monks), 0.7*nrow(monks))
+monks_train <- monks[m,]
+monks_test <- monks[-m,]
+
+classif_task <- makeClassifTask(id = "lvr", data = monks_train, target = "class")
+
+# listowanie learnerow ze wsparciem dla prawdopodobie�stw
+listLearners(properties = "prob")$class
+# listowanie zbioru hiperparametr�w
+getLearnerParamSet("classif.ranger")
+
+classif_lrn <- makeLearner("classif.ranger", par.vals = list(num.trees = 500, mtry = 3), predict.type = "prob")
+
+getParamSet(classif_lrn)
+helpLearnerParam(classif_lrn)
+getHyperPars(classif_lrn)
+
+
+model <- train(classif_lrn, classif_task)
+
+pred_train <- predict(model, newdata = monks_train)$data$prob.0
+pred_test <- predict(model, newdata = monks_test)$data$prob.0
+roc(monks_train$class, pred_train)
+roc(monks_test$class, pred_test)
+
+
+### TITANIC
+data(titanic_imputed, package = "DALEX")
+titanic_imputed$survived <- as.factor(titanic_imputed$survived)
+m <- sample(1:nrow(titanic_imputed), 0.7*nrow(titanic_imputed))
+titanic_train <- titanic_imputed[m,]
+titanic_test <- titanic_imputed[-m,]
+
+classif_task <- makeClassifTask(id = "lvr", data = titanic_train, target = "survived")
+classif_lrn <- makeLearner("classif.ranger", par.vals = list(num.trees = 2000, mtry = 3), predict.type = "prob")
+model <- train(classif_lrn, classif_task)
+
+pred_train <- predict(model, newdata = titanic_train)$data$prob.0
+pred_test <- predict(model, newdata = titanic_test)$data$prob.0
+roc(titanic_train$survived, pred_train)
+roc(titanic_test$survived, pred_test)
+
+
+### Walidacja krzy�owa
+
+classif_task <- makeClassifTask(id = "lvr", data = titanic_train, target = "survived")
+classif_lrn <- makeLearner("classif.ranger", par.vals = list(num.trees = 60, mtry = 3), predict.type = "prob")
+cv <- makeResampleDesc("CV", iters = 7)
+r <- resample(classif_lrn, classif_task, cv, measures = mlr::auc, models = TRUE)
+r$models
+AUC <- r$aggr
+AUC
+
+
+
+### Zadanie 2 Stworz model liniowy korzystajacz funkcj glm dla danych monks. Por�wnaj AUC obu modeli na zbiorze testowym
+model_linear <- glm(class~., monks_train, family = "binomial")
+roc_obj_glm <- roc(monks_test$class, predict(model_linear, monks_test, type = "response"))
+
+pred <- predict(model, newdata = monks_test)$data
+pred <- pred$prob.0
+roc_obj_ranger <- roc(monks_test$class, pred)
+roc_obj_ranger$auc
+c("glm" = roc_obj_glm$auc, "ranger" = roc_obj_ranger$auc)
diff --git a/Materialy/Lab3/BD&Shap.R b/Materialy/Lab3/BD&Shap.R
@@ -0,0 +1,80 @@
+### Model ###
+
+data(titanic_imputed, package = "DALEX")
+
+model <- ranger::ranger(survived~., data = titanic_imputed, classification = TRUE, probability = TRUE)
+
+# Podejrzyjmy parametry
+model
+
+# Podejrzyjmy predyckje
+predict(model, head(titanic_imputed))$predictions
+
+
+###DALEX###
+
+# Explainer jest to obiekt bedacy portem do wszystkich funckjonalnosci. Opakowuje on model w jednolita strukture ktora potem jest wykorzystywana do tego
+# aby wyliczyc wszystkie wyjasnienia. Kluczowe elementy to model, data, y oraz predict_function. Domyslnie DALEX wspiera duzo roznych predict function.
+
+library(DALEX)
+library(DALEXtra)
+
+explainer <- explain(model = model,
+                     data = titanic_imputed,
+                     y = titanic_imputed$survived) # WAZNE: to musi byc wartosc numerczna dla binarnej kalsyfikacji
+
+# Jezeli verbose = TRUE to otrzymamy podsumowanie naszego modelu
+# Preparation of a new explainer is initiated
+# -> model label       :  ranger  (  default  )
+# -> data              :  2207  rows  8  cols 
+# -> target variable   :  2207  values 
+# -> predict function  :  yhat.ranger  will be used (  default  )
+# -> predicted values  :  No value for predict function target column. (  default  )
+# -> model_info        :  package ranger , ver. 0.12.1 , task classification (  default  ) 
+# -> predicted values  :  numerical, min =  0.01430847 , mean =  0.3222976 , max =  0.9884335  
+# -> residual function :  difference between y and yhat (  default  )
+# -> residuals         :  numerical, min =  -0.7825395 , mean =  -0.0001408668 , max =  0.8849883  
+# A new explainer has been created!  
+
+explainer$predict_function
+?yhat
+methods("yhat")
+
+
+library(mlr)
+titanic_imputed_fct <- titanic_imputed
+titanic_imputed_fct$survived <- as.factor(titanic_imputed_fct$survived)
+
+classif_task <- makeClassifTask(data = titanic_imputed_fct, target = "survived")
+classif_lrn <- makeLearner("classif.svm", predict.type = "prob")
+model_mlr <- train(classif_lrn, classif_task)
+
+explainer_mlr <- explain(model = model_mlr,
+                         data = titanic_imputed_fct,
+                         y = as.numeric(as.character(titanic_imputed_fct$survived)))
+
+# Widzimy, �e mlr tez jest domyslnie wspierany
+
+### Break Down ###
+
+pp_ranger_bd_1 <- predict_parts(explainer, new_observation = titanic_imputed[1,])
+plot(pp_ranger_bd_1)
+
+pp_ranger_bd_2 <- predict_parts(explainer, new_observation = titanic_imputed[13,])
+plot(pp_ranger_bd_2)
+
+
+### SHAP ###
+
+pp_ranger_shap_1 <- predict_parts(explainer, new_observation = titanic_imputed[1,], type = "shap", B = 10)
+plot(pp_ranger_shap_1)
+
+pp_ranger_shap_2 <- predict_parts(explainer, new_observation = titanic_imputed[13,], type = "shap", B = 10)
+plot(pp_ranger_shap_2)
+
+
+
+# Zadanko
+
+# Wez dowolny zbior, stworz dowolny model oraz wygeneruj dla niego wyjasnienie BreakDown
+