main <- function() { # read data from file df.hc <- read.table("data05/hc_3.dat", header = TRUE, sep = " ") # compute mean for each search space id #df.perf <- summaryBy(fitness ~ id, data = df.hc) fitness.mean <- c() for(i in 0:59) { fitness.mean <- c(fitness.mean, mean(df.hc[df.hc$id == i,]$fitness)) } # save into data frame df.perf <- data.frame(id = 0:59, fitness.mean = fitness.mean) hist(df.hc[df.hc$id == 8,]$fitness) hist(df.hc[df.hc$id == 41,]$fitness, col="coral3", add = TRUE) hist(df.hc[df.hc$id == 59,]$fitness, col="yellow", add = TRUE) # Note that: ggplot2 gives better "beatiful" plots wilcox.test(df.hc[df.hc$id == 8,]$fitness, df.hc[df.hc$id == 41,]$fitness) # p-value = 0.02539 # Les medianes sont significativement differentes au niveau de confiance de 5%, mais pas au niveau de confiance de 1% wilcox.test(df.hc[df.hc$id == 8,]$fitness, df.hc[df.hc$id == 59,]$fitness) # p-value = 7.489e-05 # Les medianes sont significativement differentes au niveau de confiance de 0.1% # Donc le sous-espace 8 est meilleur que le sous-espace 59 # read file df.rnd <- read.table("data05/rnd_3.dat", header = TRUE, sep = " ") # bind the two data frames df.all <- cbind(df.perf, df.rnd) # Matrix of scatter plots pairs(df.all) # Note that: ggpairs from GGaly gives better plots # one specific plot between muplus and fitness.mean plot(fitness.mean ~ muplus, data = df.all) # pairs of correlation coefficients cor(df.all) # linear model model <- lm(fitness.mean ~ muplus, data = df.all) summary(model) abline(model) # another linear model plot(fitness.mean ~ log(f5), data = df.all) model <- lm(fitness.mean ~ log(f5), data = df.all) summary(model) abline(model) # multi-linear model model <- lm(fitness.mean ~ log(f5) + muplus, data = df.all) summary(model) }