PISA 2012 - multi dimensional Gaussian merging

Agnieszka Sitko

2017-06-25

Libraries

library(factorMerger)
library(ggplot2)
library(dplyr)
library(reshape2)

Load data

data("pisa2012")

Explore

pisa2012 %>% ggplot(aes(x = CNT)) + geom_bar() + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

meltedPisa <- pisa2012 %>% melt(na.rm = TRUE)
pisaResultsBySubject <-  meltedPisa %>% 
    ggplot(aes(x = reorder(CNT, value, FUN = median), y = value)) + geom_boxplot() + 
    facet_wrap(~variable) + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Country") 

pisaResultsBySubject

pisaResultsBySubject + 
    geom_hline(data = meltedPisa %>% group_by(variable) %>% summarise(mean = mean(value)), 
               aes(yintercept = mean, group = variable), col = "red")

TODO: Find countries significantly better, worse and not significantly different from global averages. Cluster countries into three groups.

Run MANOVA

manova(cbind(PV1MATH, PV1READ, PV1SCIE) ~ CNT, pisa2012) %>% summary()
#>               Df  Pillai approx F num Df den Df    Pr(>F)    
#> CNT           42 0.32207   776.81    126 813837 < 2.2e-16 ***
#> Residuals 271279                                             
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

It seems that there exist some differences among countries included in PISA. Let’s find them!

Factor Merger

pisaIdxSubset <- sample(1:nrow(pisa2012), size = 500)
pisaFM <- mergeFactors(pisa2012[pisaIdxSubset, 1:3],
                       factor(pisa2012$CNT[pisaIdxSubset]))

pisaFM
#> Family: gaussian Factor Merger.
#> Factor levels were recoded as below:
#> 
#> recoded   original                 
#> --------  -------------------------
#> (UnAE)    United Arab Emirates     
#> (Astrl)   Australia                
#> (Austr)   Austria                  
#> (Blgm)    Belgium                  
#> (Blgr)    Bulgaria                 
#> (Brzl)    Brazil                   
#> (Cand)    Canada                   
#> (Chil)    Chile                    
#> (Clmb)    Colombia                 
#> (CzcR)    Czech Republic           
#> (Grmn)    Germany                  
#> (Dnmr)    Denmark                  
#> (Span)    Spain                    
#> (Fnln)    Finland                  
#> (Frnc)    France                   
#> (UntK)    United Kingdom           
#> (Hngr)    Hungary                  
#> (Irln)    Ireland                  
#> (Isrl)    Israel                   
#> (Itly)    Italy                    
#> (Japn)    Japan                    
#> (Kore)    Korea                    
#> (Mlys)    Malaysia                 
#> (Nthr)    Netherlands              
#> (Nrwy)    Norway                   
#> (Plnd)    Poland                   
#> (Ch-S)    China-Shanghai           
#> (RssF)    Russian Federation       
#> (Sngp)    Singapore                
#> (Serb)    Serbia                   
#> (SlvR)    Slovak Republic          
#> (Swdn)    Sweden                   
#> (ChnT)    Chinese Taipei           
#> (Trky)    Turkey                   
#> (USoA)    United States of America 
#> 
#> Factor levels were merged in the following order:
#> 
#>       groupA                                                                                        groupB                                                                                                                           model        GIC   pvalVsFull   pvalVsPrevious
#> ----  --------------------------------------------------------------------------------------------  --------------------------------------------------------------------------------------------------------------------------  ----------  ---------  -----------  ---------------
#> 1                                                                                                                                                                                                                                -8045.028   16160.06       1.0000           1.0000
#> 11    (Nthr)                                                                                        (Chil)                                                                                                                       -8045.051   16158.10       0.9977           0.9977
#> 12    (Swdn)                                                                                        (Fnln)                                                                                                                       -8045.105   16156.21       0.9999           0.9919
#> 13    (Swdn)(Fnln)                                                                                  (Blgr)                                                                                                                       -8045.196   16154.39       1.0000           0.9822
#> 14    (Blgm)                                                                                        (Ch-S)                                                                                                                       -8045.298   16152.60       1.0000           0.9790
#> 15    (Mlys)                                                                                        (Sngp)                                                                                                                       -8045.419   16150.84       1.0000           0.9734
#> 16    (USoA)                                                                                        (Frnc)                                                                                                                       -8045.544   16149.09       1.0000           0.9719
#> 17    (Hngr)                                                                                        (Nthr)(Chil)                                                                                                                 -8045.681   16147.36       1.0000           0.9679
#> 18    (ChnT)                                                                                        (SlvR)                                                                                                                       -8045.845   16145.69       1.0000           0.9584
#> 19    (Irln)                                                                                        (Kore)                                                                                                                       -8046.032   16144.06       1.0000           0.9498
#> 110   (UntK)                                                                                        (USoA)(Frnc)                                                                                                                 -8046.250   16142.50       1.0000           0.9375
#> 111   (Japn)                                                                                        (CzcR)                                                                                                                       -8046.536   16141.07       1.0000           0.9097
#> 112   (Dnmr)                                                                                        (Brzl)                                                                                                                       -8046.834   16139.67       1.0000           0.9043
#> 113   (RssF)                                                                                        (Itly)                                                                                                                       -8047.239   16138.48       1.0000           0.8566
#> 114   (Irln)(Kore)                                                                                  (Blgm)(Ch-S)                                                                                                                 -8047.650   16137.30       1.0000           0.8536
#> 115   (Astrl)                                                                                       (Plnd)                                                                                                                       -8048.064   16136.13       1.0000           0.8514
#> 116   (Austr)                                                                                       (ChnT)(SlvR)                                                                                                                 -8048.508   16135.02       1.0000           0.8374
#> 117   (Mlys)(Sngp)                                                                                  (Span)                                                                                                                       -8048.972   16133.94       1.0000           0.8283
#> 118   (Isrl)                                                                                        (Irln)(Kore)(Blgm)(Ch-S)                                                                                                     -8049.469   16132.94       1.0000           0.8118
#> 119   (Nrwy)                                                                                        (Cand)                                                                                                                       -8050.310   16132.62       1.0000           0.6550
#> 120   (Trky)                                                                                        (UntK)(USoA)(Frnc)                                                                                                           -8051.336   16132.67       1.0000           0.5767
#> 121   (Astrl)(Plnd)                                                                                 (Grmn)                                                                                                                       -8052.566   16133.13       1.0000           0.4975
#> 122   (UnAE)                                                                                        (Swdn)(Fnln)(Blgr)                                                                                                           -8053.808   16133.62       1.0000           0.4924
#> 123   (Hngr)(Nthr)(Chil)                                                                            (RssF)(Itly)                                                                                                                 -8055.242   16134.48       1.0000           0.4258
#> 124   (Dnmr)(Brzl)                                                                                  (Clmb)                                                                                                                       -8056.878   16135.76       1.0000           0.3641
#> 125   (Dnmr)(Brzl)(Clmb)                                                                            (Serb)                                                                                                                       -8058.819   16137.64       1.0000           0.2858
#> 126   (Austr)(ChnT)(SlvR)                                                                           (Hngr)(Nthr)(Chil)(RssF)(Itly)                                                                                               -8060.877   16139.75       1.0000           0.2590
#> 127   (Austr)(ChnT)(SlvR)(Hngr)(Nthr)(Chil)(RssF)(Itly)                                             (Mlys)(Sngp)(Span)                                                                                                           -8063.868   16143.74       1.0000           0.1189
#> 128   (Nrwy)(Cand)                                                                                  (Isrl)(Irln)(Kore)(Blgm)(Ch-S)                                                                                               -8066.977   16147.95       1.0000           0.1069
#> 129   (UnAE)(Swdn)(Fnln)(Blgr)                                                                      (Trky)(UntK)(USoA)(Frnc)                                                                                                     -8070.437   16152.87       0.9997           0.0785
#> 130   (Astrl)(Plnd)(Grmn)                                                                           (Japn)(CzcR)                                                                                                                 -8075.792   16161.58       0.9954           0.0144
#> 131   (UnAE)(Swdn)(Fnln)(Blgr)(Trky)(UntK)(USoA)(Frnc)                                              (Astrl)(Plnd)(Grmn)(Japn)(CzcR)                                                                                              -8088.486   16184.97       0.7627           0.0000
#> 132   (Dnmr)(Brzl)(Clmb)(Serb)                                                                      (Austr)(ChnT)(SlvR)(Hngr)(Nthr)(Chil)(RssF)(Itly)(Mlys)(Sngp)(Span)                                                          -8105.111   16216.22       0.1084           0.0000
#> 133   (UnAE)(Swdn)(Fnln)(Blgr)(Trky)(UntK)(USoA)(Frnc)(Astrl)(Plnd)(Grmn)(Japn)(CzcR)               (Nrwy)(Cand)(Isrl)(Irln)(Kore)(Blgm)(Ch-S)                                                                                   -8122.216   16248.43       0.0013           0.0000
#> 134   (Dnmr)(Brzl)(Clmb)(Serb)(Austr)(ChnT)(SlvR)(Hngr)(Nthr)(Chil)(RssF)(Itly)(Mlys)(Sngp)(Span)   (UnAE)(Swdn)(Fnln)(Blgr)(Trky)(UntK)(USoA)(Frnc)(Astrl)(Plnd)(Grmn)(Japn)(CzcR)(Nrwy)(Cand)(Isrl)(Irln)(Kore)(Blgm)(Ch-S)    -8171.413   16344.83       0.0000           0.0000
plot(pisaFM, responsePanel = "profile")

It’s faster to use ‘hclust’ method on a big dataset.

pisaFMHClustMath <- mergeFactors(pisa2012[, 1:3],
                       factor(pisa2012$CNT),
                       method = "hclust",
                       successive = TRUE) 

plot(pisaFMHClustMath)

pisaFMHClust <- mergeFactors(pisa2012[, 1:3],
                       factor(pisa2012$CNT),
                       method = "hclust",
                       successive = FALSE) 
                       
plot(pisaFMHClust)

Let’s now have a try using European countries only.


pisaEuropean <- filter(pisa2012, CNT %in% c("Austria", "Belgium", "Bulgaria",
                                            "Czech Republic", "Germany", "Denmark",
                                            "Spain", "Estonia", "Finland",
                                            "France", "Hungary", "Ireland",
                                            "Italy", "Netherlands", "Norway",
                                            "Poland", "Portugal",
                                            "Russian Federation", "Slovak Republic",
                                            "Slovenia"))


pisaFMHClustEurope <- mergeFactors(pisaEuropean[,1:3],
                       factor(pisaEuropean$CNT),
                       method = "hclust",
                       successive = TRUE) 

plot(pisaFMHClustEurope)