if
statements within for loops# Use a loop to print integers from 1 to 100
for(i in 1:100) {
print(i)
}
# Use a loop to add the integers from 1 to 100
sum_total <- 0
for(i in 1:100) {
sum_total <- sum_total + i
}
# Create a function showstat that returns a summary statistic from a vector
showstat <- function(x,
what = "mean",
na.rm = TRUE) {
if(what == "mean") {output <- mean(x, na.rm = na.rm)}
if(what == "sd") {output <- sd(x, na.rm = na.rm)}
if(what == "median") {output <- median(x, na.rm = na.rm)}
if(what %in% c("mean", "sd", "median") == FALSE) {
stop(paste("Sorry I don't know what", what, "means. Please try 'mean', 'sd', or 'median'")
}
return(output)
}
# Create a function is.even that tests if an input is even or not
is.even <- function(x, # The number to test
print.result = FALSE) { # Should the result be printed?
# Test if x is even
even.test <- round(x / 2, 0) == (x / 2)
# Print 'x is even' if even.test is true, otherwise, print 'x is not even!'
if(print.result) {
if(even.test == TRUE) {
message("x is even!")} else {
message("x is NOT even!")}
}
return(even.test)
}
# Try our new function!
is.even(2)
is.even(3)
is.even(2, print.result = TRUE)
is.even(3, print.result = TRUE)
R
folder in your project working directory as wpa_9_LastFirst.R
, where Last and First are your last and first names.message()
works similarly to print()
):for (i in __:__) {
message(__)
}
for (i in 1:50) {
message(i)
}
for (i in 1:50) {
message(i ^ 2)
}
Question | original_score | cumulative_score |
---|---|---|
1 | 0 | 0 |
2 | 1 | 1 |
3 | 0 | 1 |
4 | 1 | 2 |
5 | 1 | 3 |
6 | 2 | 5 |
7 | 0 | 5 |
8 | 1 | 6 |
9 | 1 | 7 |
10 | 2 | 9 |
Using the following loop, create the vector cumulative_scores
that count the cumulative sum of the original scores.
# REPLACE __ WITH THE CORRECT VALUES!
# Vector of original scores on the 10 questions
original_scores <- c(0, 1, 0, 1, 1, 2, 0, 1, 1, 2)
# Create a vector of NAs where the cumulative scores will go
cumulative_scores <- rep(NA, 10)
# Loop over each of the 10 questions
for(i in __:__) {
# Calculate the cumulative sum for the current question
cumsum_i <- sum(original_scores[1:__])
# Add the value to the ith element in cumulative_scores
cumulative_scores[__] <- cumsum_i
}
# print the result!
cumulative_scores
# REPLACE __ WITH THE CORRECT VALUES!
# Vector of original scores on the 10 questions
original_scores <- c(0, 1, 0, 1, 1, 2, 0, 1, 1, 2)
# Create a vector of NAs where the cumulative scores will go
cumulative_scores <- rep(NA, 10)
# Loop over each of the 10 questions
for(i in 1:10) {
# Calculate the cumulative sum for the current question
cumsum_i <- sum(original_scores[1:i])
# Add the value to the ith element in cumulative_scores
cumulative_scores[i] <- cumsum_i
}
# print the result!
cumulative_scores
## [1] 0 1 1 2 3 5 5 6 7 9
cumsum()
that does exactly what your loop does! Try applying cumsum()
to the original scores and see if you get the same answer as your loop.cumsum(original_scores)
## [1] 0 1 1 2 3 5 5 6 7 9
Cool! They’re the same!
feed.me()
that takes a string food
as an argument, and prints the sentence “I love to eat food
”.# REPLACE __ WITH THE CORRECT VALUES!
feed.me <- function(___) {
output <- paste0("Yum! I love to eat ", ___)
print(output)
}
# REPLACE __ WITH THE CORRECT VALUES!
feed.me <- function(food) {
output <- paste0("Yum! I love to eat ", food)
print(output)
}
feed.me("apples")
, it should return Yum! I love to eat apples
.feed.me("apples")
## [1] "Yum! I love to eat apples"
feed.me()
function so that if the user specifies “avocados”, then the function returns “NOOOOO, I HATE AVACADOS!”# REPLACE __ WITH THE CORRECT VALUES!
feed.me <- function(___) {
if(__ != "avacados") {
output <- paste0("Yum! I love to eat ", ___)
}
if( __ == __) {
output <- ""
}
print(output)
}
# REPLACE __ WITH THE CORRECT VALUES!
feed.me <- function(food) {
if(food != "avacados") {
output <- paste0("Yum! I love to eat ", food)
}
if( food == "avacados") {
output <- "NOOOOO, I HATE AVACADOS!"
}
print(output)
}
my.mean()
that takes a vector x
as an argument, and returns the mean of the vector x
. Don’t use the mean()
function! Use sum()
and length()
!# REPLACE __ WITH THE CORRECT VALUES!
my.mean <- function(___) {
result <- sum(___) / length(___)
return(result)
}
# REPLACE __ WITH THE CORRECT VALUES!
my.mean <- function(x) {
result <- sum(x) / length(x)
return(result)
}
my.mean()
function to calculate the mean weights of chicks in the ChickWeight
data frame and compare your results to what you get from using the mean()
function.my.mean(ChickWeight$weight)
## [1] 121.8183
mean(ChickWeight$weight)
## [1] 121.8183
Cool! They’re the same!
how.many.na()
that takes a vector x
as an argument, and returns the number of NA values found in the vector)# REPLACE __ WITH THE CORRECT VALUES!
how.many.na <- function(x) {
output <- sum(is.na(___))
return(___)
}
how.many.na <- function(x) {
output <- sum(is.na(x))
return(output)
}
how.many.na()
function to the vector x = c(4, 7, 3, NA, NA, 1)
how.many.na(x = c(4, 7, 3, NA, NA, 1))
## [1] 2
ttest.apa()
that takes a numeric vector x
, and scaler H0
as arugments, and returns an apa style conclusion from a one-sample test of x
against the null hypothesis that the true mean of x is H0
.# REPLACE __ WITH THE CORRECT VALUES!
ttest.apa <- function(x, # A vector of data
mu) { # The mean under the null hypothesis
# Store the one-sample ttest in object a
a <- t.test(x = ___, # The vector of data
mu = ___) # The mean under the null hypothesis
df <- a$parameter # Get the degrees of freedom
test.stat <- ___ # Get the test statistic
p.value <- ___ # Get the p-value
# If the test is significant
if(p.value <= ___) {
# Sentence to print for significant result
print(paste0("The test is significant! t(",
df, ") = ", test.stat,
", p = ", p.value,
" (H0 = ", mu, ")"))
}
# If the test is NOT significant...
if(p.value > ___) {
# Sentence to print for significant result
print(___)
}
}
ttest.apa <- function(x, # A vector of data
mu) { # The mean under the null hypothesis
# Store the one-sample ttest in object a
a <- t.test(x = x, # The vector of data
mu = mu) # The mean under the null hypothesis
df <- a$parameter # Get the degrees of freedom
test.stat <- a$statistic # Get the test statistic
p.value <- a$p.value # Get the p-value
# If the test is significant
if(p.value <= 0.05) {
# Sentence to print for significant result
print(paste0("The test is significant! t(",
df, ") = ", test.stat,
", p = ", p.value,
" (H0 = ", mu, ")"))
}
# If the test is NOT significant...
if(p.value > 0.05) {
# Sentence to print for significant result
print(paste0("The test is NOT significant! t(",
df, ") = ", test.stat,
", p = ", p.value,
" (H0 = ", mu, ")"))
}
}
Note that I made a few minor changes to the code. Specifically, I replaced the confusing H0 argument with mu. They’re the same thing, but it was confusing before.
ttest.apa()
function to see if the following vector of values has a mean that is significantly different from 10. Then, do another test to see if it’s significantly different from 5.# Is the mean of these values significantly different from 10?
x <- c(6, 8, 12, 6, 8, 3, 5, 3, 7, 0)
ttest.apa(x = c(6, 8, 12, 6, 8, 3, 5, 3, 7, 0),
mu = 5)
## [1] "The test is NOT significant! t(9) = 0.760469100629373, p = 0.466428913025647 (H0 = 5)"
I probably should have chosen an example when the test WAS significant. For example, setting mu = 3 would give a p value of 0.026.
ttest.apa()
called p.sig
that indicates how low a p-value needs to be to be deemed ‘significant’. Then, test your function on the data above, but use the argument p.sig = .001
, which means that a p-value must be below 0.001 to be deemed ‘significant’.ttest.apa <- function(x, # A vector of data
mu, # The mean under the null hypothesis
p.sig) { # Significance threshold
# Store the one-sample ttest in object a
a <- t.test(x = x, # The vector of data
mu = mu) # The mean under the null hypothesis
df <- a$parameter # Get the degrees of freedom
test.stat <- a$statistic # Get the test statistic
p.value <- a$p.value # Get the p-value
# If the test is significant
if(p.value <= p.sig) {
# Sentence to print for significant result
print(paste0("The test is significant! t(",
df, ") = ", test.stat,
", p = ", p.value,
" (H0 = ", mu, ")"))
}
# If the test is NOT significant...
if(p.value > p.sig) {
# Sentence to print for significant result
print(paste0("The test is NOT significant! t(",
df, ") = ", test.stat,
", p = ", p.value,
" (H0 = ", mu, ")"))
}
}
ttest.apa(x = c(6, 8, 12, 6, 8, 3, 5, 3, 7, 0), mu = 5, p.sig = 0.001)
## [1] "The test is NOT significant! t(9) = 0.760469100629373, p = 0.466428913025647 (H0 = 5)"
Again, this example would have been more interesting if we set mu = 3, as it would be significant at the 0.05 level, but not at the 0.01 level. Also, if you don’t like having so many digits for each statistical output, you can round them to, say 3 digits, by wrapping the statistics in the round() function. For example: round(p.value, 3).
confidence_data
.The data should look like this
p1 | p2 | p3 | p4 | p5 | p6 | p7 | p8 | p9 | p10 |
---|---|---|---|---|---|---|---|---|---|
75 | 1000 | 80 | 61 | 67 | 66 | 99 | 85 | 77 | 82 |
81 | 94 | 76 | 70 | 79 | 83 | 80 | 81 | 99 | 75 |
79 | 75 | 89 | 70 | 94 | 77 | 67 | 94 | 83 | 81 |
89 | 88 | 85 | 73 | 70 | 61 | 88 | 89 | 74 | 68 |
81 | 65 | 90 | 84 | 70 | 82 | 88 | 65 | 89 | 88 |
83 | 76 | 70 | 90 | 91 | 76 | 65 | 100 | 82 | 83 |
confidence_data <- read.table("https://raw.githubusercontent.com/ndphillips/IntroductionR_Course/master/assignments/wpa/data/confidence_data.txt")
# REPLACE __ WITH THE CORRECT VALUE(S)!
# Loop over columns
for(i in 1:ncol(__)) {
# Get data from column i
data.i <- confidence_data[, i]
# Count number of responses below 0 or above 100
count_invalid <- sum(data.i < __ | data.i > __)
# Print message
message(paste0("In column ", __, "I found ", __, "values that were either below 0 or over 100"))
}
# Loop over columns
for(i in 1:ncol(confidence_data)) {
# Get data from column i
data.i <- confidence_data[, i]
# Count number of responses below 0 or above 100
count_invalid <- sum(data.i < 0 | data.i > 100)
# Print message
message(paste0("In column ", i, " I found ", count_invalid, " values that were either below 0 or over 100"))
}
## In column 1 I found 3 values that were either below 0 or over 100
## In column 2 I found 21 values that were either below 0 or over 100
## In column 3 I found 3 values that were either below 0 or over 100
## In column 4 I found 2 values that were either below 0 or over 100
## In column 5 I found 1 values that were either below 0 or over 100
## In column 6 I found 5 values that were either below 0 or over 100
## In column 7 I found 6 values that were either below 0 or over 100
## In column 8 I found 5 values that were either below 0 or over 100
## In column 9 I found 3 values that were either below 0 or over 100
## In column 10 I found 2 values that were either below 0 or over 100
# REPLACE __ WITH THE CORRECT VALUE(S)!
# Loop over columns
for(i in 1:ncol(__)) {
# Get data from column i
data.i <- confidence_data[,i]
# Count number of responses below 0 or above 100
count_invalid <- sum(data.i < __ | data.i > __)
# Print message
message(paste0("In column ", __, "I found ", __, "values that were either below 0 or over 100"))
# Replace values below 0 or above 100 with NA
data.i[data.i < __ | data.i > __] <- NA
# Assign data.i back to the ith column of the data
__[, i] <- data.i
}
# Loop over columns
for(i in 1:ncol(confidence_data)) {
# Get data from column i
data.i <- confidence_data[,i]
# Count number of responses below 0 or above 100
count_invalid <- sum(data.i < 0 | data.i > 100)
# Print message
message(paste0("In column ", i, "I found ", count_invalid, "values that were either below 0 or over 100"))
# Replace values below 0 or above 100 with NA
data.i[data.i < 0 | data.i > 100] <- NA
# Assign data.i back to the ith column of the data
confidence_data[, i] <- data.i
}
## In column 1I found 3values that were either below 0 or over 100
## In column 2I found 21values that were either below 0 or over 100
## In column 3I found 3values that were either below 0 or over 100
## In column 4I found 2values that were either below 0 or over 100
## In column 5I found 1values that were either below 0 or over 100
## In column 6I found 5values that were either below 0 or over 100
## In column 7I found 6values that were either below 0 or over 100
## In column 8I found 5values that were either below 0 or over 100
## In column 9I found 3values that were either below 0 or over 100
## In column 10I found 2values that were either below 0 or over 100
# Loop over columns
for(i in 1:ncol(confidence_data)) {
# Get data from column i
data.i <- confidence_data[,i]
# Count number of responses below 0 or above 100
count_invalid <- sum(data.i < 0 | data.i > 100, na.rm = TRUE) # Add na.rm = TRUE to ignore NA values!
# Print message
message(paste0("In column ", i, "I found ", count_invalid, " values that were either below 0 or over 100"))
# Replace values below 0 or above 100 with NA
data.i[data.i < 0 | data.i > 100] <- NA
# Assign data.i back to the ith column of the data
confidence_data[, i] <- data.i
}
## In column 1I found 0 values that were either below 0 or over 100
## In column 2I found 0 values that were either below 0 or over 100
## In column 3I found 0 values that were either below 0 or over 100
## In column 4I found 0 values that were either below 0 or over 100
## In column 5I found 0 values that were either below 0 or over 100
## In column 6I found 0 values that were either below 0 or over 100
## In column 7I found 0 values that were either below 0 or over 100
## In column 8I found 0 values that were either below 0 or over 100
## In column 9I found 0 values that were either below 0 or over 100
## In column 10I found 0 values that were either below 0 or over 100
Great! No more invalid values! Note that I had to include the argument na.rm = TRUE when defining count_invalid, otherwise count_invalid would be NA if there are any missing values in the data!
# JUST RUN! This will create 100 text files in your working directory
for (i in 1:100) {
# Create a random dataset from a hypothetical subject
data_temp <- data.frame(subject = rep(i, 10),
trial = 1:10,
response = sample(1:7, size = 10, replace = TRUE))
# Write the data to a text file
write.table(data_temp,
file = paste0("subject_", i, ".txt"),
sep = "\t")
}
subject_1
, subject_2
, … subject_100
. Run the code, then try looking at some of the objects such as subject_40
and subject_64
.# JUST RUN!
for (i in 1:100) {
# Load the data from participant i to a temporary object x
data_temp <- read.table(file = paste0("subject_", i, ".txt"),
header = TRUE,
sep = "\t")
# Create a new object subject_i containing the data!
assign(x = paste0("subject_", i),
value = data_temp)
}
rbind()
function. Look at the following code, replace __ with the correct value(s), and then run it to create a new dataframe object called all_data
which contains the data from all participants!# REPLACE __ WITH THE CORRECT VALUE(S)!
# Set up all_data object
all_data <- NULL
for (i in 1:100) {
# Load the data from participant i to a temporary object x
data_temp <- read.table(file = paste0("subject_", i, ".txt"),
header = TRUE,
sep = "\t")
# Add temprorary data to to all_data!
all_data <- rbind(data_temp, all_data)
}
# REPLACE __ WITH THE CORRECT VALUE(S)!
# Set up all_data object
all_data <- NULL
for (i in 1:100) {
message(paste("Reading subject",i))
# Load the data from participant i to a temporary object x
data_temp <- read.table(file = paste0("subject_", i, ".txt"),
header = TRUE,
sep = "\t")
# Add temprorary data to to all_data!
all_data <- rbind(data_temp, all_data)
}
## Reading subject 1
## Reading subject 2
## Reading subject 3
## Reading subject 4
## Reading subject 5
## Reading subject 6
## Reading subject 7
## Reading subject 8
## Reading subject 9
## Reading subject 10
## Reading subject 11
## Reading subject 12
## Reading subject 13
## Reading subject 14
## Reading subject 15
## Reading subject 16
## Reading subject 17
## Reading subject 18
## Reading subject 19
## Reading subject 20
## Reading subject 21
## Reading subject 22
## Reading subject 23
## Reading subject 24
## Reading subject 25
## Reading subject 26
## Reading subject 27
## Reading subject 28
## Reading subject 29
## Reading subject 30
## Reading subject 31
## Reading subject 32
## Reading subject 33
## Reading subject 34
## Reading subject 35
## Reading subject 36
## Reading subject 37
## Reading subject 38
## Reading subject 39
## Reading subject 40
## Reading subject 41
## Reading subject 42
## Reading subject 43
## Reading subject 44
## Reading subject 45
## Reading subject 46
## Reading subject 47
## Reading subject 48
## Reading subject 49
## Reading subject 50
## Reading subject 51
## Reading subject 52
## Reading subject 53
## Reading subject 54
## Reading subject 55
## Reading subject 56
## Reading subject 57
## Reading subject 58
## Reading subject 59
## Reading subject 60
## Reading subject 61
## Reading subject 62
## Reading subject 63
## Reading subject 64
## Reading subject 65
## Reading subject 66
## Reading subject 67
## Reading subject 68
## Reading subject 69
## Reading subject 70
## Reading subject 71
## Reading subject 72
## Reading subject 73
## Reading subject 74
## Reading subject 75
## Reading subject 76
## Reading subject 77
## Reading subject 78
## Reading subject 79
## Reading subject 80
## Reading subject 81
## Reading subject 82
## Reading subject 83
## Reading subject 84
## Reading subject 85
## Reading subject 86
## Reading subject 87
## Reading subject 88
## Reading subject 89
## Reading subject 90
## Reading subject 91
## Reading subject 92
## Reading subject 93
## Reading subject 94
## Reading subject 95
## Reading subject 96
## Reading subject 97
## Reading subject 98
## Reading subject 99
## Reading subject 100
p.values
with 1,000 NA values.p.values
.p.values
with 1,000 p-values.# REPLACE ALL VALUES OF __ WITH THE CORRECT VALUES!
# Create a vector of 1000 NA values
p.values <- rep(NA, ___)
# Loop over all 1000 values
for(i in ___) {
# Generate a random vector of data
x <- rnorm(n = ___, mean = ___, sd = ___)
# Calculate the te.test
result <- t.test(___)$___
# Store p-value from test in p.values
p.values[___] <- ___
}
# REPLACE ALL VALUES OF __ WITH THE CORRECT VALUES!
# Create a vector of 1000 NA values
p.values <- rep(NA, 1000)
# Loop over all 1000 values
for(i in 1:1000) {
# Generate a random vector of data
x <- rnorm(n = 10, mean = 0, sd = 1)
# Calculate the te.test
result <- t.test(x)$p.value
# Store p-value from test in p.values
p.values[i] <- result
}
mean(p.values < 0.05)
## [1] 0.06
My answer was pretty close to 0.05! So indeed, the probability of getting a significant result given that the null hypothesis is true is 0.05! You could also play around with the simulation by changing the number of samples (n), and the standard deviation (sd) to see if that has any effect.
survey
contains results from a survey of 5 participants. Each participant was asked 5 questions on a 1-10 likert scale. As you can see, many of the responses are not valid integers from 1-10. Using a loop, create a new dataframe called survey.corrected
that converts all invalid values to NA:survey <- data.frame("p" = c(1, 2, 3, 4, 5),
"q1" = c(5, 3, 6, 3, 5),
"q2" = c(-1, 4, 3, 6, 11),
"q3" = c(6, 22, 4, 6, -5),
"q4" = c(6, 3, 4, -2, 4),
"q5" = c(1, 1, 900, 1, 2))
# REPLACE ALL VALUES OF __ WITH THE CORRECT VALUES!
survey_corrected <- survey # Copy original survey
for(column.i in ____ ) { # Loop over columns
x <- ____ # Copy original column vector
x[(x %in% ___) == FALSE] <- ___ # Replace any bad values
survey_corrected[,___] <- ___ # Assign x back to survey.correced
}
# REPLACE ALL VALUES OF __ WITH THE CORRECT VALUES!
survey_corrected <- survey # Copy original survey
for(column.i in 1:ncol(survey)) { # Loop over columns
x <- survey[, column.i] # Copy original column vector
x[(x %in% 1:10) == FALSE] <- NA # Replace any bad values
survey_corrected[,column.i] <- x # Assign x back to survey.correced
}
survey_corrected
to make sure it worked!survey_corrected
## p q1 q2 q3 q4 q5
## 1 1 5 NA 6 6 1
## 2 2 3 4 NA 3 1
## 3 3 6 3 4 4 NA
## 4 4 3 6 6 NA 1
## 5 5 5 NA NA 4 2
wpa_9_LastFirst.R
file to me at nathaniel.phillips@unibas.ch.