pacman::p_load(tidyverse,pacman,openxlsx)
data <- read.xlsx("Tech_Non_Tech.xlsx")
dir()
View(data)
nrow(data) #shows the no. of rows in data
ncol(data) #shows the no. of columns in data
data1 <- data #taking backup
data <- data%>%filter(Completed == "Yes") #we need only those people for our analysis who actually completed the test.
tech_total <- data%>%filter( Technical.Edcation == "Yes")%>%nrow() #no. of people having technical education who took this test
#we filtered the people with technical education in the data and counted the no. of rows to get the desired number
tech_total
non_tech_total <- data%>%filter( Technical.Edcation == "No")%>%nrow() #no. of people NOT having technical education who took this test
non_tech_total
tech_identify <- data%>%filter( Technical.Edcation == "Yes" & Identification == "Yes")%>%nrow() #no. of people who had technical education and also could identify phishing
tech_identify
tech_no_identify <- tech_total - tech_identify #no. of people who had technical education but could NOT identify phishing
tech_no_identify
non_tech_identify <- data%>%filter( Technical.Edcation == "No" & Identification == "Yes")%>%nrow() #no. of people who did not have technical education but still could identify phishing
non_tech_no_identify <- data%>%filter( Technical.Edcation == "No" & Identification == "No")%>%nrow() #no. of people who neither had technical education nor could identify phishing
non_tech_identify
non_tech_no_identify
data2 <- matrix(c(tech_no_identify,non_tech_no_identify,tech_identify,non_tech_identify),nrow = 2) #creating a matrix named data2 with desired values
data2
rownames(data2) <- c("With technical education","Without technical education") #setting row names for data2
colnames(data2) <- c("Could not identify","Could identify") #setting column names for data2
model <- chisq.test(data2) #running chi squared test on the created matrix (as the data is logical i.e yes or no)
model
#as p-value greater than 0.05, we reject the alternate hypothesis
data3 <-data2
data3 <- as.data.frame(data3) #converting a matrix to a data frame
data2
data3
#the representation of both matrix and data frame is the same
model1 <- chisq.test(data3) #running chi squared test on the created data frame
model1
#same inference as above
#Statistics for the ppl who think they hate stats - Recommended book