# GGPLOT2 ####

library(ggplot2) 

# BODOVY GRAF A KONSTRUKCE ####

data <- iris 
str(data)

# vytvori jenom objekt
ggplot(data = data, mapping = aes(x = Sepal.Length, y = Petal.Length))  

# pridani vrstvy
ggplot(data = data, mapping = aes(x = Sepal.Length, y = Petal.Length)) + 
  geom_point() # vykresli body

# pridani druhe vrstvy
ggplot(data = data, mapping = aes(x = Sepal.Length, y = Petal.Length)) +
  geom_point() +  # vykresli body
  geom_smooth(method = "lm", col = "black") # prolozeni dat krivkou

# znamenko + musi byt na konci radku jinak vraci chybovou hlasku
ggplot(data = data, mapping = aes(x = Sepal.Length, y = Petal.Length)) 
# + geom_point() # !!!!  takto ne

# pozadavek na treti promennou s vyuzitim barev - 3 zpusoby

#1. vse v ramci funkce ggplot()
ggplot(data = data, mapping = aes(x = Sepal.Length, y = Petal.Length, 
                                  color = Species)) + 
  geom_point() +
  geom_smooth(method = "lm")


#2. data v ramci funkce ggplot(), mapovani ve vrstvach
ggplot(data = data) +
  geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length, color = Species)) +
  geom_smooth(method = "lm", mapping = aes(x = Sepal.Length, y = Petal.Length, 
                                           colour = Species,))

#3. vse v ramci vrstev, data i mapovani promennych
ggplot() +
  geom_point(data = data, mapping = aes(x = Sepal.Length, y = Petal.Length, 
                                        color = Species)) +
  geom_smooth(method = "lm", data = data, mapping = aes(x = Sepal.Length, 
                                                        y = Petal.Length, 
                                                        colour = Species))

# budeme pracovat s nasledujicim bodovym grafem
# ulozime jako objekt p

p <- ggplot(data, aes(x = Sepal.Length, y = Petal.Length, color = Species, fill = Species)) +
      geom_point() +
      geom_smooth(method = "lm")

# polohove meritka - umoznuju nastaveni rozsahu na osach

p + scale_x_continuous(limits = c(0,8)) + # alternativa xlim()
    scale_y_continuous(limits = c(0,8))   # alternativa ylim()
   #scale_y_continuous(breaks = seq(0, 8, by = 1))  # alternativa po jednickach

# barevne skaly

library(RColorBrewer)
display.brewer.all(type = "all")

# paleta
p + scale_color_brewer(palette = "Paired") +
    scale_fill_brewer(palette = "Paired") 

# seda skala - idealni pro tisk
p + scale_color_grey() +
    scale_fill_grey()

# manualni nastaveni
p + scale_color_manual(values = c("red", "black", "blue")) + 
    scale_fill_manual(values = c("red", "black", "blue"))

# doplnkovy balicek
library(paletteer)
p <- p + scale_color_paletteer_d("ggthemes::excel_Integral") + 
         scale_fill_paletteer_d("ggthemes::excel_Integral") 

# posleni ulozime do objektu p a dale s nim pracujeme
p

# nazev grafu, popisky os

p <- p + ggtitle("Petal Length ~ Sepal Length") +
         ylab("Petal Length") +
         xlab("Sepal Length") 

# motivy

?theme
p <- p + theme_bw() +
         theme(plot.title = element_text(size = 15, color = "blue"),
               axis.title = element_text(size = 13),
               axis.text = element_text(size = 12))

# pomoci element_blank() odtsranujeme elementy z grafu

p + theme(legend.title = element_blank()) 

# zoom

p + coord_cartesian(xlim = c(6,7), ylim = c(3,6)) # nejlepsi moznost
p + scale_x_continuous(limits = c(6,7)) + # alternativa xlim() # vymaze body
    scale_y_continuous(limits = c(3,6))  # alternativa ylim() # vymaze body
p + xlim(6,7) + ylim(3,6) # vymaze body

# legenda v grafu

p

# zmena nazvu legendy

p + scale_color_paletteer_d("ggthemes::excel_Integral", name = "Types") +
    scale_fill_paletteer_d("ggthemes::excel_Integral", name = "Types")


# zmena popisku v legendy 

p + scale_color_paletteer_d("ggthemes::excel_Integral", 
                            name = "Types",
                            labels = c("SET", "VER", "VIR")) +
    scale_fill_paletteer_d("ggthemes::excel_Integral", 
                           name = "Types",
                           labels = c("SET", "VER", "VIR")) 

# zmena pozice legendy, ohraniceni, velikost pisma

p + theme(legend.position = "inside",
          legend.position.inside = c(0.8, 0.2), # vlastni pozice legendy 
          legend.title = element_text(size = 12), # zmena velikosti pisma v nadpise legendy
          legend.text = element_text(size = 11), # zmena velikosti pisma v legende
          legend.background = element_rect(color = "black"))

# odstraneni nazvu legendy

p + theme(legend.title = element_blank())

# odstraneni legendy

p + theme(legend.position = "none")

# export do formatu svg a png
# pro svg potreba balik svglite

?ggsave

# vektorova grafika

library(svglite)
ggsave("scatter_plot.svg")

# rastrova grafika

ggsave("scatter_plot.png", dpi = "print")
ggsave("scatter_plot.png", dpi = "screen")

# HISTOGRAM ####

ggplot(data = data, mapping = aes(x = Petal.Length)) + 
  geom_histogram(color = "black", fill = "lightyellow", alpha = 0.8) +
  ylab("Absolutní četnost") +
  xlab("Délka korunního lístku v cm") +
  ggtitle("Délka korunního lístku kosatců") +
  theme_bw() +
  geom_vline(mapping = aes(xintercept = mean(Petal.Length)), 
             color = "red", lwd = 1, linetype = "dashed") +
  annotate(geom = "text", x = 4.2, y = 26, label = "Průměr", col = "red")

?geom_vline
?annotate

# alternativa k vrstve jsou statisticke transformace
# histogram -> geom_histogram(), alternativa stat_bin()
# stejny vysledek

ggplot(data = iris, mapping = aes(x = Petal.Length)) + 
  stat_bin(color = "black", fill = "lightyellow", alpha = 0.8) +
  ylab("Absolutní četnost") +
  xlab("Délka v cm") +
  ggtitle("Délka okvetních lístků u kosatců") +
  theme_bw() +
  geom_vline(mapping = aes(xintercept = mean(Petal.Length)), 
           color = "red", lwd = 1, linetype = "dashed") +
  annotate(geom = "text", x = 4.2, y = 26, label = "Průměr", col = "red")

# facets - deleni grafu

ggplot(data = data, mapping = aes(x = Petal.Length)) +
  geom_histogram()

ggplot(data = data, mapping = aes(x = Petal.Length, fill = Species)) +
  geom_histogram()

ggplot(data = data, mapping = aes(x = Petal.Length, fill = Species)) +
  geom_histogram() +
  facet_wrap( ~ Species, nrow = 3, strip.position = "right")

# muzeme pouzit i facet_grid ale nepouziva se, lepsi volba facet_wrap
ggplot(data = data, mapping = aes(x = Petal.Length, fill = Species)) +
  geom_histogram() +
  facet_grid(Species ~ . )

# QQPLOT ####

ggplot(data = data, mapping = aes(sample = Petal.Length)) +
  geom_qq(colour = "deepskyblue1") + 
  geom_qq_line(linewidth = 0.8) + 
  xlab("Teoretický kvantil") +
  ylab("Pozorovaný kvantil") +
  ggtitle("Q-Q graf (ověřování normality)") +
  theme_bw()

# s delenim
ggplot(data = data, mapping = aes(sample = Petal.Length)) +
  geom_qq(colour = "deepskyblue1") + 
  geom_qq_line(linewidth = 0.8) + 
  xlab("Teoretický kvantil") +
  ylab("Pozorovaný kvantil") +
  ggtitle("Q-Q graf (ověřování normality)") +
  theme_bw() +
  facet_wrap( ~ Species, ncol = 3, strip.position = "top")

# SLOUPCOVY GRAF ####

library(foreign)
data <- read.spss("debts.sav", to.data.frame = TRUE, value.labels = TRUE)

str(data)
table(data$Education)
table(data$Children)
data$Children <- factor(data$Children)

# jednoduchy slupcovy graf 

ggplot(data, aes(x = Education)) +
  geom_bar()

ggplot(data, aes(x = Education, fill = Education)) +
  geom_bar()

?geom_bar

# defaultne: stat = "count", position = "stack"
# zamerime se na position 

ggplot(data, aes(x = Education, fill = Education)) +
  geom_bar(position = "stack") 

# zamena osy x a y

ggplot(data, aes(x = Education, fill = Education)) +
  geom_bar(position = "stack") +
  coord_flip()

# sloupcovy skladany graf

ggplot(data, aes(x = Children, fill = Education)) +
  geom_bar(position = "stack")

# 100% skladany sloupcovy graf

ggplot(data, aes(x = Children, fill = Education)) +
  geom_bar(position = "fill") 

ggplot(data, aes(x = Children, fill = Education)) +
  geom_bar(position = "fill") +
  coord_flip()

# skupinovy graf

ggplot(data, aes(x = Children, fill = Education)) +
  geom_bar(position = "dodge") 

# zamerime se na argument stat - zavisi na dataframe

# jednoduchy sloupcovy graf

# stat = "count"

ggplot(data, aes(x = Education, fill = Education)) +
  geom_bar(position = "stack", stat = "count") 

table(data$Education)

# stat = "identity"

data_educ <- read.spss("debts_educ.sav", to.data.frame = TRUE)

ggplot(data_educ, aes(x = Education, y = Freq, fill = Education)) +
  geom_bar(position = "stack", stat = "identity") 

# popisky

# stat = "identity"

# geom_text()

ggplot(data_educ, aes(x = Education, y = Freq, fill = Education)) +
  geom_bar(position = "stack", stat = "identity") +
  geom_text(aes(label = Freq), 
            stat = "identity",
            vjust = 1.5,
            color = "black")

# hjust/vjust = -0.5 pozice popisku je nad sloupcem 
# hjust/vjust = 1.5 pozice popisku je uvnitr sloupcu pred koncem

# geom_label()

ggplot(data_educ, aes(x = Education, y = Freq, fill = Education)) +
  geom_bar(position = "stack", stat = "identity") +
  geom_label(aes(label = Freq),
             stat = "identity",
             vjust = 1.5,
             color = "white",
             show.legend = FALSE)

# stat = "count"

# geom_text()

ggplot(data, aes(x = Education, fill = Education)) +
  geom_bar(position = "stack", stat = "count") +
  geom_text(aes(label = after_stat(count)), 
            stat = "count",
            vjust = 1.5,
            colour = "black") 

# geom_label()

ggplot(data, aes(x = Education, fill = Education)) +
  geom_bar(position = "stack", stat = "count") +
  geom_label(aes(label = after_stat(count)),
             stat = "count", 
             vjust = 1.5,
             colour = "white", 
             show.legend = FALSE)

# popisky - 100% sloupcovy skladany graf

plot1 <- ggplot(data, aes(x = Children, fill = Education)) +
          geom_bar(position = "fill", stat = "count") +
          geom_text(aes(label = after_stat(count)),
                    position = "fill",
                    stat = "count",
                    colour = "black",
                    vjust = 1,
                    size = 3) + 
          scale_fill_brewer(palette = "Set3") +
          ylab("Relative frequency") +
          ggtitle("Education ~ Children") +
          theme_bw()

print(plot1)

# KOLACOVY GRAF ####

table(data$MaritalStatus)

plot2 <- ggplot(data, aes(x = 1, fill = MaritalStatus)) +
            geom_bar(col = "black") +
            coord_polar(theta = "y") +
            geom_label(aes(label = after_stat(count)), 
                            stat = "count", 
                            position = position_stack(vjust = 0.5),
                            show.legend = FALSE,
                            color = "white", size = 4) +
            scale_fill_paletteer_d("ggthemes::excel_Red_Violet", 
                                              name = "Marital Status") +
            ggtitle("Marital Status of Debtors") +
            theme_void()

print(plot2)

# BOXPLOT ####

ggplot(data, aes(x = "", y = Debt)) +
  geom_boxplot()

# boxplot podle skupiny

ggplot(data, aes(x = Education, y = Debt, fill = Education)) +
  geom_boxplot()

# boxplot podle skupin

ggplot(data, aes(x = Education, y = Debt, fill = Gender)) +
  geom_boxplot()

library(ggpubr) # ANOVA test

plot3 <- ggplot(data, aes(x = Education, y = Debt, fill = Education)) +
          geom_boxplot(width = 0.5) +
          scale_fill_brewer(palette = "Set3") +
          scale_y_continuous(labels = scales::comma) +
          ggtitle("Debt ~ Education") +
          xlab("") +
          theme_bw() +
          theme(legend.position = "none") +
          geom_hline(yintercept = mean(data$Debt),
                     linetype = 2, colour = "red", linewidth = 1) +
          annotate(geom = "text", y = 65000, x = 0.7, label = "Mean", 
                   colour = "red") +
          stat_compare_means(method = "anova", label.y = 10) # ANOVA test

print(plot3)

# KORELOGRAM ####

library(ggcorrplot)

data$Children <- as.numeric(data$Children)

cor(data[1:6])
cor_pmat(data[1:6])
ggcorrplot(cor(data[1:6]))

plot4 <- ggcorrplot(
            cor(data[1:6]), # korelacni matice
            lab = TRUE, # hodnoty korelacnich koeficientu
            type = "lower",  # dolni trojuhelnik
            colors = c("#00B2EE", "white", "#FF4040"),  # barevna skala
            outline.color = "white",  # ohraniceni ctvercu
            lab_size = 4,  # velikost popisku
            legend.title = "Correlation", # nazev legendy
            title = "Correlation of Variables",  # nazev grafu
            p.mat = cor_pmat(data[1:6]),  # test korelacniho koeficientu
            sig.level = 0.05,  # hladina vyznamnosti
            insig = "pch",  # oznacit nevyznamni koeficienty
            pch = 4,  # typ oznaceni - krizek
            pch.cex = 14) # velikost oznaceni

print(plot4)

# SPECIALNI GRAFY ####

library(GGally)

ggpairs(data, columns = 1:3)

library(ggpubr)

# scatter + boxplot  

# margin.plot = c("density", "histogram", "boxplot")

ggscatterhist(data = data,
              x = "Income", y = "Debt",
              margin.plot = "boxplot")

ggscatterhist(data = data,
              x = "Income", y = "Debt",
              margin.plot = "boxplot",
              fill = "Gender", color = "Gender", 
              size = 3, alpha = 0.6, 
              ggtheme = theme_bw())

# INTERAKTIVNI GRAFY ####

library(plotly) 

plot5 <- ggplot(data = data, mapping = aes(x = Income, y = Debt)) +
           geom_point() 

ggplotly(plot5)

# korelacni koeficient do grafu + vyznamnost

ggplot(data = data, mapping = aes(x = Income, y = Debt)) +
  geom_point() +
  stat_cor(label.x.npc = "left",
           label.y.npc = "bottom")

# KOMBINACE GRAFU ####

# sloupcovy
plot1

# kolacovy
plot2

# boxplot
plot3

# korelogram
plot4

# bodovy graf
plot5 

library(patchwork) 

plot1 + plot4

plot1 + plot2 + plot3 + plot4 +
  plot_layout(ncol = 2)

# dva grafy vedla seba
plot1 | plot5

# dva grafy pod sebou
plot1 / plot3

# nazev pro kombinaci grafu a popisky
plot1 + (plot2 / plot4) + 
  plot_annotation(title = 'Title for all the plots',
                  subtitle = "Subtitle",
                  caption = "Caption",
                  tag_levels = "A", 
                  tag_prefix = "Plot ")

# komentovani vice radku najednou
# oznacit radky ktore chceme zakomentovat -> ctrl + shift + c

# SPOJNICOVY GRAF ####

library(ggpmisc) # stat_peaks(), stat_walleys()

head(economics)
data <- as.data.frame(economics[economics$date > as.Date("2000-01-01"), ])
head(data)

p <- ggplot(data, aes(x = date, y = unemploy)) +
      geom_line() +
      scale_x_date(date_labels = "%Y-%m", date_breaks = "5 years") +
      #scale_x_date(date_labels = "%Y", date_breaks = "1 years") +
      scale_y_continuous(labels = scales::comma) +
      ggtitle("Unemployment") +
      ylab("Number of unemployed") +
      xlab("") 

print(p)

# doplneni dalsich informaci - balik ggpmisc

p + geom_vline(xintercept = as.Date("2007-09-15"),
               linetype = 2, color = "green", lwd = 0.8) +
    stat_peaks(geom = "point", span = NULL, color = "steelblue3", size = 2) +
    stat_peaks(geom = "label", span = NULL, color = "steelblue3", angle = 0,
               hjust = -0.1, x.label.fmt = "%Y-%m-%d") +
    stat_valleys(geom = "point", span = NULL, color = "red", size = 2) +
    stat_valleys(geom = "label", span = NULL, color = "red", angle = 0,
                 hjust = -0.1, x.label.fmt = "%Y-%m-%d") 

