From a18d2993d70b22026694ce05374640026b0753a9 Mon Sep 17 00:00:00 2001 From: Maria Polewczyk Date: Wed, 23 Jan 2019 10:31:30 +0100 Subject: [PATCH] Update main.R --- main.R | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/main.R b/main.R index 63aef4a..77b2b8e 100644 --- a/main.R +++ b/main.R @@ -2,7 +2,6 @@ library(data.table) library(tidytext) library(magrittr) library(dplyr) -library(fifer) library(keras) library(readr) @@ -26,7 +25,6 @@ get_data <- function(input, num_words, max_len, is_train) { } train <- fread('../input/train.csv', data.table = FALSE) -#train <- stratified(train, "target", 0.2) test <- fread('../input/test.csv', data.table = FALSE) data = get_data(train, max_words, maxlen, TRUE) @@ -36,13 +34,11 @@ embeddings <- readLines('../input/embeddings/wiki-news-300d-1M/wiki-news-300d-1M embeddings_index = new.env(hash = TRUE, parent = emptyenv()) embeddings <- embeddings[2:length(embeddings)] -pb <- txtProgressBar(min = 0, max = length(embeddings), style = 3) for (i in 1:length(embeddings)){ embedding <- embeddings[[i]] values <- strsplit(embedding, " ")[[1]] word <- values[[1]] embeddings_index[[word]] = as.double(values[-1]) - setTxtProgressBar(pb, i) } word_vectors = array(0, c(max_words, 300)) @@ -65,10 +61,9 @@ cat("Not found:", not_found) labels = train$target indices = sample(1:nrow(data)) -training_indices = indices[1:nrow(data)] -x_train = data[training_indices,] -y_train = labels[training_indices] +x_train = data[indices,] +y_train = labels[indices] input <- layer_input( shape = list(NULL), @@ -78,7 +73,7 @@ input <- layer_input( predictions <- input %>% layer_embedding(input_dim = max_words, output_dim = 300, name = "embedding") %>% - layer_lstm(units = maxlen,dropout = 0.25, recurrent_dropout = 0.25, return_sequences = FALSE, name = "lstm") %>% + layer_lstm(units = 64, dropout = 0.25, recurrent_dropout = 0.25, return_sequences = FALSE, name = "lstm") %>% layer_dense(units = 128, activation = "relu", name = "dense") %>% layer_dense(units = 1, activation = "sigmoid", name = "predictions") @@ -105,7 +100,7 @@ history <- model %>% fit( batch_size = 1024, epochs = 30, validation_split=0.1, - verbose = 1 + verbose = 2 ) print(history) @@ -116,7 +111,8 @@ Sys.time() predictions <- predict(model, test_data) predictions <- ifelse(predictions >= 0.5, 1, 0) -submission = data.frame(cbind(test$qid, predictions)) -names(submission) = c("qid", "prediction") +result = data.frame(cbind(test$qid, predictions)) +names(result) = c("qid", "prediction") +write_csv(result, "submission.csv") Sys.time()