An R companion to Statistics: data analysis and modelling

can’t

install.packages("sdamr")
install.packages("remotes")
remotes::install_github("mspeekenbrink/sdam-r")
library(sdamr)
2 + 11 # addition
## [1] 13
2 * 11 # multiplication
## [1] 22
2 / 11 # division
## [1] 0.1818182
2^(11) # exponentiation
## [1] 2048
sqrt(2) # square root
## [1] 1.414214
2^(1/2) # another way to compute the square root
## [1] 1.414214
2 + 11*3
## [1] 35
(2 + 11)*3
## [1] 39
library(sdamr)
data("fifa2010teams")
head(fifa2010teams)
##   nr         team matches_played goals_for goals_scored goals_against
## 1  1     Germany               7        16           16             5
## 2  2 Netherlands               7        12           11             6
## 3  3     Uruguay               7        11           11             8
## 4  4   Argentina               5        10            9             6
## 5  5      Brazil               5         9            9             4
## 6  6       Spain               7         8            8             2
##   penalty_goal own_goals_for yellow_cards indirect_red_cards direct_red_cards
## 1            0             0           13                  0                0
## 2            0             0           24                  0                0
## 3            1             0           11                  0                1
## 4            0             0            7                  0                0
## 5            0             0            9                  0                1
## 6            0             0            8                  0                0
summary(fifa2010teams)
##        nr            team           matches_played   goals_for     
##  Min.   : 1.00   Length:32          Min.   :3.00   Min.   : 0.000  
##  1st Qu.: 8.75   Class :character   1st Qu.:3.00   1st Qu.: 2.000  
##  Median :16.50   Mode  :character   Median :3.50   Median : 3.000  
##  Mean   :16.50                      Mean   :4.00   Mean   : 4.531  
##  3rd Qu.:24.25                      3rd Qu.:4.25   3rd Qu.: 5.250  
##  Max.   :32.00                      Max.   :7.00   Max.   :16.000  
##   goals_scored    goals_against     penalty_goal    own_goals_for
##  Min.   : 0.000   Min.   : 1.000   Min.   :0.0000   Min.   :0    
##  1st Qu.: 2.000   1st Qu.: 3.000   1st Qu.:0.0000   1st Qu.:0    
##  Median : 3.000   Median : 5.000   Median :0.0000   Median :0    
##  Mean   : 4.469   Mean   : 4.531   Mean   :0.2812   Mean   :0    
##  3rd Qu.: 5.250   3rd Qu.: 5.250   3rd Qu.:0.2500   3rd Qu.:0    
##  Max.   :16.000   Max.   :12.000   Max.   :2.0000   Max.   :0    
##   yellow_cards    indirect_red_cards direct_red_cards
##  Min.   : 2.000   Min.   :0          Min.   :0.0000  
##  1st Qu.: 6.000   1st Qu.:0          1st Qu.:0.0000  
##  Median : 7.500   Median :0          Median :0.0000  
##  Mean   : 8.156   Mean   :0          Mean   :0.2812  
##  3rd Qu.: 9.000   3rd Qu.:0          3rd Qu.:0.2500  
##  Max.   :24.000   Max.   :0          Max.   :2.0000
c(TRUE, FALSE, TRUE, TRUE)
## [1]  TRUE FALSE  TRUE  TRUE
c(3,4,802.376)
## [1]   3.000   4.000 802.376
c("Coffee","now","please")
## [1] "Coffee" "now"    "please"
c(TRUE, FALSE, 12)
## [1]  1  0 12
c(TRUE, 5.67788, "let's see what happens")
## [1] "TRUE"                   "5.67788"                "let's see what happens"
my_vector <- c(1,2,10:20)
my_vector
##  [1]  1  2 10 11 12 13 14 15 16 17 18 19 20
mat <- matrix(1:10,ncol=2)
mat # matrices are filled column-wise
##      [,1] [,2]
## [1,]    1    6
## [2,]    2    7
## [3,]    3    8
## [4,]    4    9
## [5,]    5   10
mat[,2] # select the second column (the result is a vector)
## [1]  6  7  8  9 10
mat[3,1] # select the value in the third row and first column
## [1] 3
# construct a factor by giving integer values and specifying the accompanying 
# labels
fact <- factor(c(1,2,2,3),labels=c("red","green","blue"))
fact # display it
## [1] red   green green blue 
## Levels: red green blue
fact == "green" # determine which elements equal (==) 'green'
## [1] FALSE  TRUE  TRUE FALSE
lst <- list(a=mat, b=fact) # construct a named list with a matrix and factor
lst
## $a
##      [,1] [,2]
## [1,]    1    6
## [2,]    2    7
## [3,]    3    8
## [4,]    4    9
## [5,]    5   10
## 
## $b
## [1] red   green green blue 
## Levels: red green blue
lst$a
##      [,1] [,2]
## [1,]    1    6
## [2,]    2    7
## [3,]    3    8
## [4,]    4    9
## [5,]    5   10
lst$b
## [1] red   green green blue 
## Levels: red green blue
lst[[1]]
my_data_frame <- data.frame(var1 = 1:10, var2 = 10:1, var3 = rep(c("a","b"),times=5))
my_data_frame
##    var1 var2 var3
## 1     1   10    a
## 2     2    9    b
## 3     3    8    a
## 4     4    7    b
## 5     5    6    a
## 6     6    5    b
## 7     7    4    a
## 8     8    3    b
## 9     9    2    a
## 10   10    1    b
data.frame(var1 = 1:10, var2 = 1:11)
data.frame(var1 = 1:10, var2 = 1:5, var3 = 1)
##    var1 var2 var3
## 1     1    1    1
## 2     2    2    1
## 3     3    3    1
## 4     4    4    1
## 5     5    5    1
## 6     6    1    1
## 7     7    2    1
## 8     8    3    1
## 9     9    4    1
## 10   10    5    1
mean(fifa2010teams$goals_for)
## [1] 4.53125
median(fifa2010teams$goals_for)
## [1] 3
IQR(fifa2010teams$goals_for,type=1)
## [1] 3
sample_var(fifa2010teams$goals_for)
## [1] 13.24902
sample_sd(fifa2010teams$goals_for)
## [1] 3.639921
sample_mode(fifa2010teams$goals_for)
## [1] 3
hist(fifa2010teams$goals_for)
hist(fifa2010teams$goals_for,main="Histogram of points scored by teams in the FIFA 2010 World Cup", xlab="Goals for", breaks=20)
library(ggplot2)
ggplot(fifa2010teams,aes(x=goals_for)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(ggplot2)
ggplot(fifa2010teams,aes(x=goals_for)) + geom_histogram(bins=10, colour="black", fill='#8C8279') + xlab("Goals scored")
boxplot(fifa2010teams$goals_for)
ggplot(fifa2010teams,aes(y=goals_for)) + geom_boxplot()
ggplot(fifa2010teams,aes(x="",y=goals_for)) + geom_boxplot(width=.2) + xlab("")
plot(x=fifa2010teams$matches_played, y=fifa2010teams$goals_for)
ggplot(fifa2010teams, aes(x=matches_played, y=goals_for)) + geom_point()
set.seed(467)
plot_raincloud(data=fifa2010teams, y=goals_for)
## Warning: Removed 1 rows containing missing values (`geom_segment()`).
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

An R companion to Statistics: data analysis and modelling

Chapter 1 Introduction

1.1 What is R?¹

1.2 Getting started

1.2.1 Download R

1.2.2 Download R Studio

1.3 Working with RStudio

1.4 Installing packages

1.4.1 Installing the `sdamr` package

1.4.2 Loading packages

1.5 Getting help

1.6 First steps: R as a calculator

1.7 Data

1.7.1 Data types

1.7.2 Objects

1.7.3 Importing data

1.8 Exploring data: Descriptive statistics

1.9 Exploring data: Creating plots

1.9.1 Histogram

1.9.2 Boxplot

1.9.3 Scatterplot

1.9.4 Raincloud plot

1.10 A note about namespaces and loading packages

References

Chapter 1 Introduction

1.1 What is R?1

1.2 Getting started

1.2.1 Download R

1.2.2 Download R Studio

1.3 Working with RStudio

1.4 Installing packages

1.4.1 Installing the sdamr package

1.4.2 Loading packages

1.5 Getting help

1.6 First steps: R as a calculator

1.7 Data

1.7.1 Data types

1.7.2 Objects

1.7.3 Importing data

1.8 Exploring data: Descriptive statistics

1.9 Exploring data: Creating plots

1.9.1 Histogram

1.9.2 Boxplot

1.9.3 Scatterplot

1.9.4 Raincloud plot

1.10 A note about namespaces and loading packages

References

1.1 What is R?¹

1.4.1 Installing the `sdamr` package