# Loading packages, data, etc####
install.packages("Hmisc")
pacman::p_load(pacman, tidyverse, openxlsx, Hmisc)
phish <- read.xlsx("phishing.xlsx")
View(phish)
names(phish) <- str_to_lower(names(phish))
phish1 <- phish %>% select(age:victim, awareness:memory, practice, skill, result)
View(phish1)
str(phish1)
# Running correlation for all columns####
phish1 %>% as.matrix() %>% rcorr() -> p1
p1
# Specific correlation between two variables ####
cor.test(phish1$result, phish1$awareness) -> p2
class(p2) # shows htest class
names(p2)
p2$p.value
names(phish1)
# Linear model code####
model <- lm(result~age+gender+edu+tech+income+familysize+hours+years+freqency+victim+awareness+vigilance+numeric+alpha+memory+practice+skill, data = phish1)
model
summary(model)
class(model) # gives class lm, stands for linear model
names(model)
# Creating a plot####
png("awareness.png")
phish1 %>% ggplot(aes(awareness, result)) +
geom_point() +
stat_smooth(method = lm) #lm for linear model
dev.off()
file.show("awareness.png")
# Plot for age####
png("age.png")
phish1 %>% ggplot(aes(age, result)) +
geom_point() +
stat_smooth(method = lm) #lm for linear model
dev.off()
file.show("age.png")
# Making Predictions####
predict1 <- predict(model, interval = "prediction")
View(predict1)
predict2 <- predict(model, interval = "confidence") #lwr and upr are much more closer to fit
View(predict2)
# New phish , adding columns####
newphish1 <- cbind(phish1, predict1) #column binding
View(newphish1)
newphish2 <- cbind(phish1, predict2)
View(newphish2)
# Plotting ####
png("p1.png")
newphish1 %>% ggplot(aes(awareness, result)) +
geom_point() +
stat_smooth(method = lm) +
geom_line(aes(y = lwr), color = "Red", linetype = "dashed") +
geom_line(aes(y = upr), color = "Red", linetype = "dashed")
dev.off()
file.show("p1.png")
png("p2.png")
newphish2 %>% ggplot(aes(awareness, result)) +
geom_point() +
stat_smooth(method = lm) +
geom_line(aes(y = lwr), color = "Red", linetype = "dashed") +
geom_line(aes(y = upr), color = "Red", linetype = "dashed")
dev.off()
file.show("p2.png")
# Only result with awareness####
model1 <- lm(phish1$result~phish1$awareness) # New model with only awareness and result
model1
predict3 <- predict(model1, interval = "prediction")
View(predict3)
predict4 <- predict(model1, interval = "confidence")
View(predict4)
newphish3 <- cbind(phish1, predict3)
newphish4 <- cbind(phish1, predict4)
View(newphish3)
View(newphish4)
# New plots####
png("p3.png")
newphish3 %>% ggplot(aes(awareness, result)) +
geom_point() +
stat_smooth(method = lm) +
geom_line(aes(y = lwr), color = "Red", linetype = "dashed") +
geom_line(aes(y = upr), color = "Red", linetype = "dashed")
dev.off()
file.show("p3.png")
png("p4.png")
newphish4 %>% ggplot(aes(awareness, result)) +
geom_point() +
stat_smooth(method = lm) +
geom_line(aes(y = lwr), color = "Red", linetype = "dashed") +
geom_line(aes(y = upr), color = "Red", linetype = "dashed")
dev.off()
file.show("p4.png")
# Viewing Outliers ####
phish1 %>% filter(awareness > 9 & result < 5) %>% print()
which(phish1$awareness > 9 & phish1$result < 5) -> index #indices of outliers
View(index)
outlier <- phish1[index, ]
View(outlier)
# These people had a high awareness but still had low results