# DATA ####

# import dat
library(foreign)
data <- read.spss("hapiness.sav", to.data.frame = TRUE)

# BODOVY GRAF ####
?plot
plot(x = data$Vnimani_korupce, y = data$Index, 
     main = "Zavislost mezi korupci a stestim")

# MATICOVY BODOVY GRAF ####

pairs(data[,4:10],lower.panel = NULL)

# MATICOVY BODOVY GRAF S PROLOZENIM DAT ####

library(car)
scatterplotMatrix(data[, 4:10], 
                  col = "black", 
                  smooth = FALSE, 
                  regLine = list(col = "red"))

# KORELACNI MATICE - PEARSON ####

korelace <- cor(data[,4:10])
korelace <- round(korelace, 2) 
diag(korelace) <- NA #smazani diagonaly
korelace[upper.tri(korelace)] <- NA #smazani hodnot nad diagonalou 

library(gt)
korelace_gt <- korelace %>% gt()

# obarveni dle hodnot korelacneho koeficientu 
korelace_gt %>%
              data_color(
                method = "numeric",
                palette = c("red","blue"),
                domain = c(-1,1)
              )

# VIZUALIZACE KORELACNI MATICE ####

library(corrplot)
?corrplot
corrplot(korelace)
corrplot(korelace, 
         type = "lower", 
         diag = FALSE)

library(ggcorrplot)
ggcorrplot(korelace)
ggcorrplot(
  korelace,
  lab = TRUE, # hodnoty korelacnich koeficientu
  type = "lower",
  colors = c("#00B2EE", "white", "#FF4040"),
  outline.color = "white",
  lab_size = 4,
  legend.title = "Korelacni koeficient",
  title = "Korelace" #,
 # p.mat = cor_pmat(data[,4:10]),
 # sig.level = 0.05,
 # insig = "pch", 
 # pch = 4,
  #pch.cex = 14
 )

# EXTREMNI HODNOTY ####

boxplot(data$HDP_na_obyvatele, ylim = c(0, 130000))
boxplot(data$HDP_na_obyvatele, ylim = c(0, 130000))$out
data$HDP_bez_extremu <- replace(data$HDP_na_obyvatele, data$HDP_na_obyvatele > 80000, NA)
cor(data[, c(4,5,11)])
?cor # argument use
cor(data[, c(4,5,11)], use = "pairwise.complete.obs")

# PORADOVA KORELACE ####
round(cor(data[,c(4,5,11)], method = "pearson", use = "pairwise.complete.obs"),2)

round(cor(data[,c(4,5,11)], method = "spearman", use = "pairwise.complete.obs"),2)

round(cor(data[,c(4,5,11)], method = "kendall", use = "pairwise.complete.obs"),2)

# ROZDELENI a TRANSFORMACE ####
hist(data$HDP_na_obyvatele)
data$HDP_log <- log(data$HDP_na_obyvatele)
hist(data$HDP_log)

round(cor(data[,c(4,5,12)], method = "pearson", use = "pairwise.complete.obs"),2)

# test vyznamnosti korelacniho koeficientu
library(rstatix)
vyznamnost <- cor_pmat(data[,4:10])
vyznamnost <- as.data.frame(vyznamnost)
rownames(vyznamnost) <- vyznamnost$rowname
vyznamnost <- round(vyznamnost[,2:8], 5) 
diag(vyznamnost) <- NA 
vyznamnost[upper.tri(korelace)] <- NA 

library(PerformanceAnalytics)
chart.Correlation(data[,4:10])

# HETEROGENNI KORELACE ####

# pearson - dve ciselne promenne
# polychoric - dve ordinalni promenne
# polyserial - ciselna x ordinalni promenna 

data_zam <- read.spss("zamestnanci.sav", to.data.frame = TRUE)
str(data_zam)

library(polycor)
?hetcor
hetcor(data_zam[,3:4])$type
hetcor(data_zam[,3:4])$correlations

hetcor(data_zam[,2:3])$type
hetcor(data_zam[,2:3])$correlations

# PARCIALNI KORELACE ####

install.packages("ppcor")
library(ppcor)

pcor(data[,4:10])$estimate
pcor(data[,4:10])$p.value
