Paso 2.16

Análisis de clases latentes exploratoria y comparativa con predictores (glca), sin pueblo originario y sin año y recategorización de edad mujer y semana gestacional hito 1

Andrés González Santa Cruz
May 14, 2023
Show code
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"
Show code
 $(document).ready(function() {
    $('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
    // onClick function for all plots (img's)
    $('img:not(.zoomImg)').click(function() {
      $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
      $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
    });
    // onClick function for zoomImg
    $('img.zoomImg').click(function() {
      $('.zoomDiv').css({opacity: '0', width: '0%'}); 
    });
  });
  
Show code
<script src="hideOutput.js"></script> 
Show code
$(document).ready(function() {    
    $chunks = $('.fold');    
    $chunks.each(function () {      // add button to source code chunks     
    if ( $(this).hasClass('s') ) {       
        $('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
            $('pre.r', this).children('code').attr('class', 'folded');     
            }      // add button to output chunks     
        if ( $(this).hasClass('o') ) {       
            $('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");       
            $('pre:not(.r)', this).children('code:not(r)').addClass('folded');        // add button to plots       
            $(this).find('img').wrap('<pre class=\"plot\"></pre>');       
            $('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");       
            $('pre.plot', this).children('img').addClass('folded');      
            }   
});    // hide all chunks when document is loaded   
    $('.folded').css('display', 'none')    // function to toggle the visibility   
    $('.showopt').click(function() {     
            var label = $(this).html();     
            if (label.indexOf("Show") >= 0) {       
                $(this).html(label.replace("Show", "Hide"));     
            } else {
              $(this).html(label.replace("Hide", "Show"));     
            }     
    $(this).siblings('code, img').slideToggle('fast', 'swing');   
    }); 
}); 

Cargamos los datos

Show code
rm(list = ls());gc()
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  652814 34.9    1332068 71.2   879926 47.0
Vcells 1170238  9.0    8388608 64.0  2075161 15.9
Show code
load("data2_lca215_sin_po_alt_ano_2023_05_14.RData")

Cargamos los paquetes

Show code
knitr::opts_chunk$set(echo = TRUE)

if(!require(poLCA)){install.packages("poLCA")}
if(!require(poLCAParallel)){devtools::install_github("QMUL/poLCAParallel@package")}
if(!require(compareGroups)){install.packages("compareGroups")}
if(!require(parallel)){install.packages("parallel")}
if(!require(Hmisc)){install.packages("Hmisc")}
if(!require(tidyverse)){install.packages("tidyverse")}
try(if(!require(sjPlot)){install.packages("sjPlot")})
if(!require(emmeans)){install.packages("emmeans")}
if(!require(nnet)){install.packages("nnet")}
if(!require(here)){install.packages("here")}
if(!require(doParallel)){install.packages("doParallel")}
if(!require(progress)){install.packages("progress")}
if(!require(caret)){install.packages("caret")}
if(!require(rpart)){install.packages("rpart")}
if(!require(rpart.plot)){install.packages("rpart.plot")}
if(!require(partykit)){install.packages("partykit")}
if(!require(randomForest)){install.packages("randomForest")}
if(!require(ggcorrplot)){install.packages("ggcorrplot")}
if(!require(polycor)){install.packages("polycor")}
if(!require(tableone)){install.packages("tableone")}
if(!require(broom)){install.packages("broom")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rsvg)){install.packages("rsvg")}
if(!require(DiagrammeRsvg)){install.packages("DiagrammeRsvg")}
if(!require(glca)){install.packages("glca")}
#if(!require(poLCA)){githubinstall::gh_install_packages("poLCA", ref = github_pull("14"))}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

Definimos ciertas constantes

Show code
clus_iter= 500 #500
n_thread <- parallel::detectCores()
nrep <- clus_iter # number of different initial values (could be n_thread too)
n_class_max <- 10 # maximum number of classes to investigate
n_bootstrap <- 100 #30 # 50 number of bootstrap samples
print(n_thread)
[1] 8

Análisis de clases latentes

Especificar modelo

Los valores ceros se declararon datos perdidos.

Show code
f_adj<- item(CAUSAL, EDAD_MUJER_REC, PAIS_ORIGEN_REC, HITO1_EDAD_GEST_SEM_REC, MACROZONA, PREV_TRAMO_REC) ~ outcome

Análisis de clases latentes

Show code
#Biemer, P. P., & Wiesen, C. (2002). Measurement error evaluation of self-reported drug use: a latent class analysis of the US National Household Survey on Drug Abuse. Journal of the Royal Statistical Society: Series A (Statistics in Society), 165(1), 97–119. doi:10.1111/1467-985x.00612  
#lca_entropia(x="ppio", seed= 2125, k= 8, f= f_preds, dat= mydata_preds, nbr_repet= 30, na_rm= T)
#3
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
# f is the selected variables
# dat is the data
# nb_var is the number of selected variables
# k is the number of latent class generated
# nbr_repet is the number of repetition to  
# reach the convergence of EM algorithm
# x es el código para las variables de los modelos
#seed es el numero random para las semillas. ej: 4345.
#Modo de calcular el mejor modelo.
#z_ # 
#2023-01-20
#https://github.com/QMUL/poLCAParallel/blob/master/exec/3_blrt.R
#0h s
seed<-2125
old <- Sys.time()

lca22 <- glca(f_adj, data = mydata_preds3, nclass = 2, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca23 <- glca(f_adj, data = mydata_preds3, nclass = 3, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca24 <- glca(f_adj, data = mydata_preds3, nclass = 4, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca25 <- glca(f_adj, data = mydata_preds3, nclass = 5, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca26 <- glca(f_adj, data = mydata_preds3, nclass = 6, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca27 <- glca(f_adj, data = mydata_preds3, nclass = 7, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca28 <- glca(f_adj, data = mydata_preds3, nclass = 8, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca29 <- glca(f_adj, data = mydata_preds3, nclass = 9, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca210 <- glca(f_adj, data = mydata_preds3, nclass = 10, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)

new_med<-(Sys.time())
paste0("The model took ",round(new_med-old,2)," until every LCA was computed")
[1] "The model took 1.55 until every LCA was computed"

Luego calculamos la razón de verosimilitud mediante remuestreo bootstrap (BLRT) entre los distintos modelos con el que asume una clase menos.

Show code
gof2<-
gofglca(lca22, lca23, lca24, lca25, lca26, lca27, lca28, lca29, lca210, test = "chisq")

bootlrt2<-
gofglca(lca22, lca23, lca24, lca25, lca26, lca27, lca28, lca29, lca210, test = "boot", nboot=n_bootstrap, seed=2125)

Resultados

Hicimos un gráfico de los resultados

Show code
manualcolors <- c('indianred1', 'cornflowerblue', 'gray50', 'darkolivegreen4', 'slateblue2', 
                  'firebrick4', 'goldenrod4')
levels4 <- c("logLik", "Gsq", "AIC", "CAIC", "BIC", "entropy", "Res.Df")
labels4 <- c('Log-Verosimilitud', 'Chi2', 'Criterio de Información\nde Akaike(AIC)'','AIC Corregido'','Criterio de Información\nBayesiano (BIC)')''Entropía'­a'Grados de libertad residuales')s')
fig_lca_fit4<- cbind.data.frame(rn=2:10,gof2$gtable) %>%
  data.frame() %>% 
  dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
  tidyr::pivot_longer(cols = -rn,names_to = "indices", values_to = "value", values_drop_na = F) %>%
  dplyr::mutate(indices = factor(indices, levels = levels4, labels = labels4)) %>%
  dplyr::filter(grepl("(AIC|BIC)",indices, ignore.case=T))%>%
  dplyr::mutate(ModelIndex= factor(rn, levels=2:10)) %>% 
  ggplot(aes(x = ModelIndex, y = value, group = indices, color = indices, linetype = indices)) +
  geom_line(size = 1.5) +
  scale_color_manual(values = manualcolors) +
  #scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  labs(x = "Número de clases"", y=="Valor"", color=="Medida"", linetype=="Medida"))++
  #facet_wrap(.~indices, scales = "free_y", nrow = 4, ncol = 1) +
  theme_bw()

fig_lca_fit4
Show code
ggsave("_fig1_comparison_glca_sin_po_ano.png",fig_lca_fit4, dpi=600)
#International Journal of Workplace Health Management  (Zhang et al., 2018).

Luego en una tabla

Show code
cbind.data.frame(rn=2:10,gof2$gtable) %>%#
  dplyr::select(rn, everything()) %>% 
    dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
    knitr::kable(format="markdown", caption="Índices de ajuste modelos")
Table 1: Índices de ajuste modelos
rn logLik AIC CAIC BIC entropy Res.Df Gsq
2 -22706.84 45497.68 45801.76 45759.76 0.9903695 3746 3536.090
3 -22464.96 45057.91 45521.26 45457.26 0.8901938 3724 3052.317
4 -22282.17 44736.34 45358.97 45272.97 0.9097230 3702 2686.744
5 -22149.63 44515.27 45297.17 45189.17 0.9225448 3680 2421.673
6 -22062.06 44384.11 45325.29 45195.29 0.8627183 3658 2246.516
7 -22003.44 44310.89 45411.35 45259.35 0.8536486 3636 2129.293
8 -21950.00 44248.00 45507.74 45333.74 0.7837666 3614 2022.410
9 -21900.74 44193.49 45612.50 45416.50 0.7559552 3592 1923.894
10 -21868.96 44173.91 45752.20 45534.20 0.7437696 3570 1860.319
Show code
best_model2<-
as.numeric(cbind.data.frame(rn=2:10,gof2$gtable) %>% dplyr::summarise(which.min(BIC)+1))

Presentamos el modelo con mejor ajuste

Show code
summary(eval(parse(text = paste0("lca2",best_model2)))) #

Call:
glca(formula = f_adj, data = mydata_preds3, nclass = 5, n.init = 500, 
    decreasing = T, testiter = 500, maxiter = 10000, seed = seed, 
    verbose = FALSE)

Manifest items : CAUSAL EDAD_MUJER_REC PAIS_ORIGEN_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PREV_TRAMO_REC 
Covariates (Level 1) : outcome 

Categories for manifest items :
                        Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
CAUSAL                      2     3     4                  
EDAD_MUJER_REC              1     2     3     4     5      
PAIS_ORIGEN_REC             1     2     3                  
HITO1_EDAD_GEST_SEM_REC     1     2     3     4            
MACROZONA                   1     2     3     4     5     6
PREV_TRAMO_REC              1     2     3     4     5      

Model : Latent class analysis 

Number of latent classes : 5 
Number of observations : 3789 
Number of parameters : 108 

log-likelihood : -22149.63 
     G-squared : 2421.673 
           AIC : 44515.27 
           BIC : 45189.17 

Marginal prevalences for latent classes :
Class 1 Class 2 Class 3 Class 4 Class 5 
0.12504 0.54687 0.13337 0.04254 0.15218 

Class prevalences by group :
    Class 1 Class 2 Class 3 Class 4 Class 5
ALL 0.12504 0.54687 0.13337 0.04254 0.15218

Logistic regression coefficients :
            Class 1/5 Class 2/5 Class 3/5 Class 4/5
(Intercept)    0.4116    2.2509   -0.6620   -1.5768
outcome1      -0.6863   -1.1317    0.5664    0.3254
Item-response probabilities :
CAUSAL 
         Y = 1  Y = 2  Y = 3
Class 1 0.4626 0.5374 0.0000
Class 2 0.4068 0.5932 0.0000
Class 3 0.2021 0.7979 0.0000
Class 4 0.0245 0.0000 0.9755
Class 5 0.0049 0.0000 0.9951
EDAD_MUJER_REC 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5
Class 1 0.0000 0.0089 0.1746 0.5022 0.3142
Class 2 0.0082 0.0181 0.2114 0.4563 0.3060
Class 3 0.0000 0.0000 0.0392 0.4345 0.5262
Class 4 0.0000 0.1647 0.2332 0.4514 0.1507
Class 5 0.0017 0.3483 0.2460 0.2951 0.1089
PAIS_ORIGEN_REC 
         Y = 1  Y = 2  Y = 3
Class 1 0.0121 0.0001 0.9878
Class 2 0.0059 0.9941 0.0000
Class 3 0.0000 0.9069 0.0931
Class 4 0.0000 0.0007 0.9993
Class 5 0.0000 0.9932 0.0068
HITO1_EDAD_GEST_SEM_REC 
         Y = 1  Y = 2  Y = 3  Y = 4
Class 1 0.0204 0.1142 0.7360 0.1294
Class 2 0.0290 0.1733 0.6603 0.1374
Class 3 0.0181 0.3704 0.5720 0.0396
Class 4 0.0192 0.9808 0.0000 0.0000
Class 5 0.0088 0.9877 0.0035 0.0000
MACROZONA 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5  Y = 6
Class 1 0.0000 0.5660 0.1060 0.0623 0.2147 0.0510
Class 2 0.0038 0.3205 0.1918 0.1998 0.0949 0.1893
Class 3 0.0000 0.7205 0.0937 0.0498 0.0700 0.0660
Class 4 0.0062 0.5563 0.0870 0.0165 0.2956 0.0384
Class 5 0.0038 0.3850 0.1751 0.1452 0.0862 0.2047
PREV_TRAMO_REC 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5
Class 1 0.0098 0.0007 0.6260 0.2819 0.0817
Class 2 0.0020 0.0305 0.6254 0.3384 0.0037
Class 3 0.0000 0.7586 0.0043 0.2281 0.0091
Class 4 0.0256 0.0061 0.5214 0.1672 0.2796
Class 5 0.0018 0.0696 0.7237 0.1997 0.0053
Show code
save.image("data2_lca2_adj216_alt_sin_po_ano.RData")
Show code
require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))