#####
pacman::p_load(tidyverse,pacman)#load packages
search()
data<-read.csv("stu.csv")#data input
View(data)
names(data)#column names
n1<-c("sr", "name", "email", "id", "phone")
names(data)<-n1#assigning column names
data%>%select(name, phone)%>%print()#selecting some columns
data$id
str_sub(data$id,1,4)
data9<-mutate(data,name = str_sub(data$email,1,9))#taking "data" mutating it by replacing name column by a different value
View(data9)
data9$year<-str_sub(data$id,1,4)
data9%>%arrange(year)#sorting increasing order
data$year<-str_sub(data$id,1,4)
data%>%arrange(year)->data#saving the pipeline output
data%>%select(name,year)%>%arrange(year)
data%>%select(name,year)%>%arrange(desc(year))#sort in decreasing order
data%>%select(name,year,phone)%>%arrange(year)
data%>%select(name,year,phone)%>%filter(year=="2020")#select columns, then select some rows based on the constraint
#random function----
sample(data$name, 5)#5 random output from given data set
sample(data$name, 5, replace = FALSE)#output without replacement
runif(10,0.0,10.0)#random floating points from a given range, in uniform distribution
runif(10,0,10)
runif(data$name)#length of data frame
runif("yash")
runif(100)
?runif
sample(1:100, 10)#10 random integer between 1 and 100
data$sr<-c(1:31)
#play with rows using slice----
data%>%slice_sample(n=5)#n random rows
data%>%slice_head(n=3)#first n rows
data%>%slice_tail(n=3)#last n rows
data%>%slice_head(prop = 0.5)#logistic regression#top 50% of data
data%>%slice_sample(prop = 0.5)#top 50% of data
View(data)
data$email<-str_replace(data$email, pattern = "goa", replacement = "dubai")#replace goa->dubai
data<-mutate(data, email = str_replace(data$email, pattern = "dubai", replacement = "pilani"))#dubai->pilani
data %>% mutate(email = str_replace(data$email, pattern = "pilani", replacement = "hydrabad"))#pilani->hydrabad
#data %>% mutate(., email = str_replace(data$email, pattern = "pilani", replacement = "hydrabad"))
#playing around with TITANIC data====
pacman::p_load(tidyverse,pacman)
data<-Titanic#importing data set
class(data)
data<-as.data.frame(data)#type conversion
class(data)
names(data)
levels(data$Sex)
View(data)
#filter()- output rows based on constraint
#select()- output selected columns
#sum()-sum of all the numbers
data%>%filter(Survived == "Yes" & Sex =="Female")%>%select(Freq)%>%sum()
data%>%select(Freq)%>%sum()#total passenger
data%>%filter(Survived == "Yes")%>%select(Freq)%>%sum()
data%>%filter(Survived == "Yes" & Sex =="Male")%>%select(Freq)%>%sum()
data%>%filter(Survived == "Yes" & Class == "Crew")%>%select(Freq)%>%sum()
data%>%filter(Survived == "Yes" & Class == "1st")%>%select(Freq)%>%sum()
data%>%filter(Survived == "Yes" & Class == "2nd")%>%select(Freq)%>%sum()
data%>%filter(Survived == "Yes" & Class == "3rd")%>%select(Freq)%>%sum()
data%>%filter(Survived == "No" & Class == "Crew")%>%select(Freq)%>%sum()
data%>%filter(Survived == "No" & Class == "1st")%>%select(Freq)%>%sum()
data%>%filter(Survived == "No" & Class == "2nd")%>%select(Freq)%>%sum()
data%>%filter(Survived == "No" & Class == "3rd")%>%select(Freq)%>%sum()
data%>%filter(Class == "Crew")%>%select(Freq)%>%sum()
#Bar plot from data====
a1<-c(10,20,15,30)
png("bar.png")
barplot(a1)
dev.off()
file.show("bar.png")
#female survived and male survived plot
data%>%filter(Survived == "Yes" & Sex =="Female")%>%select(Freq)%>%sum()->b1
data%>%filter(Survived == "Yes" & Sex =="Male")%>%select(Freq)%>%sum()->b2
a2<-c(b1,b2)
png("bar1.png")
barplot(a2,names.arg = c("Female", "Male"), col = rainbow(2),
ylim = c(0,500), main = "Total People Survied",
xlab = "gender", ylab = "Number")
legend("topright",c("Female", "Male"), fill = rainbow(2))
dev.off()
file.show("bar1.png")