diff --git a/paper/Muhamad_Sulhan_-_Detecting_vandalism_on_English_Wikipedia_using_CRF.tex b/paper/23513014_Muhamad_Sulhan_-_Paper.tex similarity index 100% rename from paper/Muhamad_Sulhan_-_Detecting_vandalism_on_English_Wikipedia_using_CRF.tex rename to paper/23513014_Muhamad_Sulhan_-_Paper.tex diff --git a/paper/diagram_process.tex b/paper/diagram_process.tex index ab2b258..990d530 100644 --- a/paper/diagram_process.tex +++ b/paper/diagram_process.tex @@ -1,7 +1,7 @@ \begin{figure}[tp!] \centering \resizebox{0.3\textwidth}{!} { -\tikzsetnextfilename{diagramprocess} +\mytikzinput{diagramprocess} \begin{tikzpicture}[ framed, nodes = { diff --git a/paper/introduction.tex b/paper/introduction.tex index 593b360..a857fe8 100644 --- a/paper/introduction.tex +++ b/paper/introduction.tex @@ -24,17 +24,15 @@ assume that class distribution is balanced, while in real world cases this rarely happened. -RF has the disadvantage in their the computation time especially when training -the classification model. -For a large dataset with more than 10,000 samples (like the PAN-WVC-10 cases) -this could lead to hours of training time. -One of the solution is by using Cascaded Random Forest (CRF) framework proposed -by Bauman et al. -\cite{baumann2013cascaded}. -Their paper state that CRF give a fast training model time and increased -performance compared to RF. +Random Forest (RF) has the disadvantages in their the computation time +especially when training the classification model. For a large dataset with +more than 10,000 samples (like the PAN-WVC-10 cases) this could lead to hours +of training time. One of the solution is by using Cascaded Random Forest (CRF) +framework proposed by Bauman et al. \cite{baumann2013cascaded}. Their paper +state that CRF give a fast training model time and increased performance +compared to RF. -This paper attempt to overcome the dataset imbalance problem on PAN-WVC-10 by +This paper attempts to overcome the dataset imbalance problem on PAN-WVC-10 by applying resample and classifier technique that has never been used before on the dataset. The PAN-WVC-10 dataset is resampled using Local Neighborhood SMOTE (LNSMOTE) diff --git a/paper/latexmk.sh b/paper/latexmk.sh index 657936d..6a86dfa 100755 --- a/paper/latexmk.sh +++ b/paper/latexmk.sh @@ -1,3 +1,3 @@ #!/bin/sh -latexmk -pvc -view=pdf Muhamad_Sulhan_-_Detecting_vandalism_on_English_Wikipedia_using_CRF.tex +latexmk -pvc -view=pdf 23513014_Muhamad_Sulhan_-_Paper.tex diff --git a/paper/preamble.tex b/paper/preamble.tex index 0cd818a..55edf9b 100644 --- a/paper/preamble.tex +++ b/paper/preamble.tex @@ -47,6 +47,13 @@ \usetikzlibrary{backgrounds, shapes.geometric, positioning, patterns, external} \tikzexternalize +%% Make tikz generate PDF file to .tmp directory +\makeatletter + \newcommand{\mytikzinput}[1]{% + \tikzsetnextfilename{tmp/#1}% + } +\makeatother + %% Pgfplots \usepackage{pgfplots}