This is the exercise for lecture 2. Code copies from: * https://rpubs.com/koushikstat/167274 * https://www.mailman.columbia.edu/sites/default/files/media/fdawg_ggplot2.html

1. Import required packages

library("pheatmap") 
library("vegan") 
library("tidyverse")
library(reshape2)

2. Example: Heatmap of RNAseq

healthy <- read.table("myoviridae_healthy.txt")
healthy_hellinger <- decostand(healthy, method="hellinger") 
pheatmap(healthy_hellinger, cluster_cols=FALSE, cellwidth=8, cellheight=8, main="Healthy") 

3. Iris example

Description of iris dataset from wiki

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
tail(iris)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

Plots from iris

Plot

plot(iris)

boxplot

box <- ggplot(iris, aes(x=Species, y=Sepal.Length))
box + geom_boxplot(aes(fill=Species)) + 
  ylab("Sepal Length") + ggtitle("Iris Boxplot") +
  stat_summary(fun.y=mean, geom="point", shape=5, size=4) 

barplot

iris2 <- melt(iris, id.vars="Species")
iris2[1:3,]
##   Species     variable value
## 1  setosa Sepal.Length   5.1
## 2  setosa Sepal.Length   4.9
## 3  setosa Sepal.Length   4.7
bar1 <- ggplot(iris2, aes(x=Species, y=value, fill=variable)) + 
  geom_bar(stat="identity", position="dodge") + 
  scale_fill_manual(values=c("orange", "blue", "darkgreen", "purple"),
                    name="Iris\nMeasurements",
                    breaks=c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"),
                    labels=c("Sepal Length", "Sepal Width", "Petal Length", "Petal Width"))
bar1

Adding Smoothers

smooth <- ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) + 
  geom_point(aes(shape=Species), size=1.5) + xlab("Sepal Length") + ylab("Sepal Width") + 
  ggtitle("Scatterplot with smoothers")

# Linear model
smooth + geom_smooth(method="lm")

Loess smooth

# Local polynomial regression
smooth + geom_smooth(method="loess")

Faceting

facet <- ggplot(iris, aes(Sepal.Length, y=Sepal.Width, color=Species)) + 
  geom_point(aes(shape=Species), size=1.5) + geom_smooth(method="lm") +
  xlab("Sepal Length") + ylab("Sepal Width") + ggtitle("Faceting")

# Along rows
facet + facet_grid(. ~ Species)

# Along columns
facet + facet_grid(Species ~ .)

Survey table

Read table

tb <- read_csv("Survey2.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
##   Timestamp = col_character(),
##   Sex = col_character(),
##   Level = col_character(),
##   Field = col_character(),
##   Species = col_character(),
##   Organism = col_character(),
##   Degree = col_character(),
##   Programming_Exp = col_character(),
##   NGS_Exp = col_character(),
##   work_NGS = col_character(),
##   Experiment_type = col_character(),
##   Who_collect = col_character(),
##   Who_wet = col_character(),
##   Who_dry = col_character(),
##   NGS_machines = col_character(),
##   NGS_machine_type = col_character(),
##   Like_lectures = col_character(),
##   Dislike_lectures = col_character()
## )

Some quick exploration

Number of males versus females

tb %>%
  ggplot(., aes(Sex)) +
  geom_bar()+
  theme_bw()

Number of males versus females and categorised into levels

tb %>%
  ggplot(., aes(Sex)) +
  geom_bar(aes(fill=Level)) +
  theme_bw()

Focus

Filter out and undergraduate, Research institute and Faculties

#Notice the !(xxx) means opposite, 
#So !(Level %in% c("xxx")) is opposite of Level %in% c("xxx")
tb2 <- tb %>%
  filter(!(Level %in% c("Faculties","Undergraduate", "Research\ Institute") ) )


tb3 <- tb %>%
  filter(Level %in% c("Faculties") ) 

Student and postdoc species interest

tb2 %>% separate_rows(Species, sep=", ?") %>%
  ggplot(., aes(Species)) +
  geom_bar(aes(fill=Level)) +
  coord_flip()+
  theme_bw()

Student and postdoc with TIGP degree species interest

tb2 %>% separate_rows(Species, sep=", ?") %>%
  filter(Degree == "TIGP") %>%
  ggplot(., aes(Species)) +
  geom_bar(aes(fill=Level)) +
  coord_flip()+
  theme_bw()

table is a good function to summarise a volumn

table(tb2$NGS_Exp)
## 
##   Extensive        None Very little 
##          11           7          18
tb2 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
  filter() %>%
  ggplot(., aes(Dislike_lectures)) +
  geom_bar(aes(fill=Level)) +
  coord_flip()+
  theme_bw()

tb2 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
  ggplot(., aes(Dislike_lectures)) +
  geom_bar(aes(fill=Level)) +
  facet_grid(NGS_Exp ~ .) +
  coord_flip()+
  theme_bw()

tb3 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
  ggplot(., aes(Dislike_lectures)) +
  geom_bar(aes(fill=Level)) +
  coord_flip()+
  theme_bw()

tb3 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
  ggplot(., aes(Dislike_lectures)) +
  geom_bar(aes(fill=Level)) +
  facet_grid(NGS_Exp ~ .) +
  coord_flip()+
  theme_bw()

tb2 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
  filter(Degree == "TIGP") %>%
  ggplot(., aes(Dislike_lectures)) +
  geom_bar(aes(fill=Level)) +
  coord_flip()+
  theme_bw()

tb2 %>% separate_rows(Dislike_lectures, sep=", ?") %>%
  ggplot(., aes(Dislike_lectures)) +
  geom_bar(aes(fill=Level)) +
  facet_grid(. ~ Degree) +
  coord_flip()+
  theme_bw()

tb2 %>% separate_rows(Like_lectures, sep=", ?") %>%
   ggplot(., aes(Like_lectures)) +
  geom_bar(aes(fill=Level)) +
  coord_flip()+
  theme_bw()

tb2 %>% separate_rows(Like_lectures, sep=", ?") %>%
  group_by(Like_lectures) %>%
  summarise(n=n())
## # A tibble: 12 x 2
##    Like_lectures                                                 n
##    <chr>                                                     <int>
##  1 Amplicon / Metagenomic                                       17
##  2 Basic usage of Linux and R ; Practical I: R                  24
##  3 Comparative Genomics                                         19
##  4 DNA/RNA preparation and different sequencing technologies    15
##  5 From sequence to alignment to phylogenies                    16
##  6 Genome Assembly                                              22
##  7 Mapping and Case studies                                     17
##  8 Population Genomics                                          16
##  9 Practical II: Assembly and Mapping                           13
## 10 Practical III: Sequence alignment + phylogeny                13
## 11 RNAseq: Differential Expression                              21
## 12 RNAseq: Genome annotation                                    22