This is the exercise for lecture 2. Code copies from: * https://rpubs.com/koushikstat/167274 * https://www.mailman.columbia.edu/sites/default/files/media/fdawg_ggplot2.html
library("pheatmap")
library("vegan")
library("tidyverse")
library(reshape2)
healthy <- read.table("myoviridae_healthy.txt")
healthy_hellinger <- decostand(healthy, method="hellinger")
pheatmap(healthy_hellinger, cluster_cols=FALSE, cellwidth=8, cellheight=8, main="Healthy")
Description of iris dataset from wiki
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
tail(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
plot(iris)
box <- ggplot(iris, aes(x=Species, y=Sepal.Length))
box + geom_boxplot(aes(fill=Species)) +
ylab("Sepal Length") + ggtitle("Iris Boxplot") +
stat_summary(fun.y=mean, geom="point", shape=5, size=4)
iris2 <- melt(iris, id.vars="Species")
iris2[1:3,]
## Species variable value
## 1 setosa Sepal.Length 5.1
## 2 setosa Sepal.Length 4.9
## 3 setosa Sepal.Length 4.7
bar1 <- ggplot(iris2, aes(x=Species, y=value, fill=variable)) +
geom_bar(stat="identity", position="dodge") +
scale_fill_manual(values=c("orange", "blue", "darkgreen", "purple"),
name="Iris\nMeasurements",
breaks=c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"),
labels=c("Sepal Length", "Sepal Width", "Petal Length", "Petal Width"))
bar1
smooth <- ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) +
geom_point(aes(shape=Species), size=1.5) + xlab("Sepal Length") + ylab("Sepal Width") +
ggtitle("Scatterplot with smoothers")
# Linear model
smooth + geom_smooth(method="lm")
# Local polynomial regression
smooth + geom_smooth(method="loess")
facet <- ggplot(iris, aes(Sepal.Length, y=Sepal.Width, color=Species)) +
geom_point(aes(shape=Species), size=1.5) + geom_smooth(method="lm") +
xlab("Sepal Length") + ylab("Sepal Width") + ggtitle("Faceting")
# Along rows
facet + facet_grid(. ~ Species)
# Along columns
facet + facet_grid(Species ~ .)
Read table
tb <- read_csv("Survey2.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
## Timestamp = col_character(),
## Sex = col_character(),
## Level = col_character(),
## Field = col_character(),
## Species = col_character(),
## Organism = col_character(),
## Degree = col_character(),
## Programming_Exp = col_character(),
## NGS_Exp = col_character(),
## work_NGS = col_character(),
## Experiment_type = col_character(),
## Who_collect = col_character(),
## Who_wet = col_character(),
## Who_dry = col_character(),
## NGS_machines = col_character(),
## NGS_machine_type = col_character(),
## Like_lectures = col_character(),
## Dislike_lectures = col_character()
## )
tb %>%
ggplot(., aes(Sex)) +
geom_bar()+
theme_bw()
tb %>%
ggplot(., aes(Sex)) +
geom_bar(aes(fill=Level)) +
theme_bw()
Filter out and undergraduate, Research institute and Faculties
#Notice the !(xxx) means opposite,
#So !(Level %in% c("xxx")) is opposite of Level %in% c("xxx")
tb2 <- tb %>%
filter(!(Level %in% c("Faculties","Undergraduate", "Research\ Institute") ) )
tb3 <- tb %>%
filter(Level %in% c("Faculties") )
Student and postdoc species interest
tb2 %>% separate_rows(Species, sep=", ?") %>%
ggplot(., aes(Species)) +
geom_bar(aes(fill=Level)) +
coord_flip()+
theme_bw()
Student and postdoc with TIGP degree species interest
tb2 %>% separate_rows(Species, sep=", ?") %>%
filter(Degree == "TIGP") %>%
ggplot(., aes(Species)) +
geom_bar(aes(fill=Level)) +
coord_flip()+
theme_bw()
table is a good function to summarise a volumn
table(tb2$NGS_Exp)
##
## Extensive None Very little
## 11 7 18
tb2 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
filter() %>%
ggplot(., aes(Dislike_lectures)) +
geom_bar(aes(fill=Level)) +
coord_flip()+
theme_bw()
tb2 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
ggplot(., aes(Dislike_lectures)) +
geom_bar(aes(fill=Level)) +
facet_grid(NGS_Exp ~ .) +
coord_flip()+
theme_bw()
tb3 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
ggplot(., aes(Dislike_lectures)) +
geom_bar(aes(fill=Level)) +
coord_flip()+
theme_bw()
tb3 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
ggplot(., aes(Dislike_lectures)) +
geom_bar(aes(fill=Level)) +
facet_grid(NGS_Exp ~ .) +
coord_flip()+
theme_bw()
tb2 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
filter(Degree == "TIGP") %>%
ggplot(., aes(Dislike_lectures)) +
geom_bar(aes(fill=Level)) +
coord_flip()+
theme_bw()
tb2 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
ggplot(., aes(Dislike_lectures)) +
geom_bar(aes(fill=Level)) +
facet_grid(. ~ Degree) +
coord_flip()+
theme_bw()
tb2 %>% separate_rows(Like_lectures, sep=", ?") %>%
ggplot(., aes(Like_lectures)) +
geom_bar(aes(fill=Level)) +
coord_flip()+
theme_bw()
tb2 %>% separate_rows(Like_lectures, sep=", ?") %>%
group_by(Like_lectures) %>%
summarise(n=n())
## # A tibble: 12 x 2
## Like_lectures n
## <chr> <int>
## 1 Amplicon / Metagenomic 17
## 2 Basic usage of Linux and R ; Practical I: R 24
## 3 Comparative Genomics 19
## 4 DNA/RNA preparation and different sequencing technologies 15
## 5 From sequence to alignment to phylogenies 16
## 6 Genome Assembly 22
## 7 Mapping and Case studies 17
## 8 Population Genomics 16
## 9 Practical II: Assembly and Mapping 13
## 10 Practical III: Sequence alignment + phylogeny 13
## 11 RNAseq: Differential Expression 21
## 12 RNAseq: Genome annotation 22