-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanscombequartet.R
37 lines (26 loc) · 1.14 KB
/
anscombequartet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Anscombe's quartet
#is a collection of four datasets that have identical summary statistics.
#hence the data summary alone becomes misleading,
#so <b> data visualisation </b> becomes very essential and help us
#discover things in our data that otherwise would remian hidden.
#Lets load the dataset package
install.packages('Tmisc')
library(dplyr)
library(tidyverse)
library(Tmisc)
data(quartet)
View(quartet)
#now the data can be summarized using various statistical methods
quartet %>%
group_by(set) %>%
summarize( mean(x), sd(x), mean(y),sd(y), cor(x,y))
#so we observe based on the statistical summaries we have created
#these data sets are identical.
#but sometimees just look at summarized data can be misleading
#we use plots to clearly see the difference.
names(quartet)
ggplot(quartet, aes(x,y))+geom_point()+ geom_smooth(method=lm,se=FALSE)+facet_wrap( ~set)
#one more cool thing, is DatasauRus package.
install.packages('datasauRus')
library("datasauRus")
ggplot(datasaurus_dozen,aes(x=x,y=y,color=dataset))+geom_point()+theme_void() + theme(legend.position = "none")+facet_wrap(~dataset)