Show code

script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"

Show code

 $(document).ready(function() {
    $('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
    // onClick function for all plots (img's)
    $('img:not(.zoomImg)').click(function() {
      $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
      $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
    });
    // onClick function for zoomImg
    $('img.zoomImg').click(function() {
      $('.zoomDiv').css({opacity: '0', width: '0%'}); 
    });
  });

Show code

<script src="hideOutput.js"></script>

Show code

$(document).ready(function() {    
    $chunks = $('.fold');    
    $chunks.each(function () {      // add button to source code chunks     
    if ( $(this).hasClass('s') ) {       
        $('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
            $('pre.r', this).children('code').attr('class', 'folded');     
            }      // add button to output chunks     
        if ( $(this).hasClass('o') ) {       
            $('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");       
            $('pre:not(.r)', this).children('code:not(r)').addClass('folded');        // add button to plots       
            $(this).find('img').wrap('<pre class=\"plot\"></pre>');       
            $('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");       
            $('pre.plot', this).children('img').addClass('folded');      
            }   
});    // hide all chunks when document is loaded   
    $('.folded').css('display', 'none')    // function to toggle the visibility   
    $('.showopt').click(function() {     
            var label = $(this).html();     
            if (label.indexOf("Show") >= 0) {       
                $(this).html(label.replace("Show", "Hide"));     
            } else {
              $(this).html(label.replace("Hide", "Show"));     
            }     
    $(this).siblings('code, img').slideToggle('fast', 'swing');   
    }); 
});

Cargamos los datos

Show code

rm(list = ls());gc()

         used (Mb) gc trigger (Mb) max used (Mb)
Ncells 535731 28.7    1208804 64.6   643711 34.4
Vcells 906385  7.0    8388608 64.0  1649728 12.6

Show code

load("data2_lca2_2023_04_26.RData")

Cargamos los paquetes

Show code

knitr::opts_chunk$set(echo = TRUE)

if(!require(poLCA)){install.packages("poLCA")}
if(!require(poLCAParallel)){devtools::install_github("QMUL/poLCAParallel@package")}
if(!require(compareGroups)){install.packages("compareGroups")}
if(!require(parallel)){install.packages("parallel")}
if(!require(Hmisc)){install.packages("Hmisc")}
if(!require(tidyverse)){install.packages("tidyverse")}
try(if(!require(sjPlot)){install.packages("sjPlot")})
if(!require(emmeans)){install.packages("emmeans")}
if(!require(nnet)){install.packages("nnet")}
if(!require(here)){install.packages("here")}
if(!require(doParallel)){install.packages("doParallel")}
if(!require(progress)){install.packages("progress")}
if(!require(caret)){install.packages("caret")}
if(!require(rpart)){install.packages("rpart")}
if(!require(rpart.plot)){install.packages("rpart.plot")}
if(!require(partykit)){install.packages("partykit")}
if(!require(randomForest)){install.packages("randomForest")}
if(!require(ggcorrplot)){install.packages("ggcorrplot")}
if(!require(polycor)){install.packages("polycor")}
if(!require(tableone)){install.packages("tableone")}
if(!require(broom)){install.packages("broom")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rsvg)){install.packages("rsvg")}
if(!require(DiagrammeRsvg)){install.packages("DiagrammeRsvg")}
if(!require(glca)){install.packages("glca")}
#if(!require(poLCA)){githubinstall::gh_install_packages("poLCA", ref = github_pull("14"))}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

Definimos ciertas constantes

Show code

clus_iter= 500 #500
n_thread <- parallel::detectCores()
nrep <- clus_iter # number of different initial values (could be n_thread too)
n_class_max <- 10 # maximum number of classes to investigate
n_bootstrap <- 100 #30 # 50 number of bootstrap samples
print(n_thread)

[1] 8

Análisis de clases latentes

Especificar modelo

Show code

library(DiagrammeR) #⋉
gr_lca4<-
DiagrammeR::grViz([1352 chars quoted with '"'])#, width = 1200, height = 900
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
DPI = 1200
WidthCM = 21
HeightCM = 8

gr_lca4 %>%
  export_svg %>% charToRaw %>% rsvg_pdf("_flowchart_lca_adj._exp2pdf")

gr_lca4 %>% export_svg()%>%charToRaw %>% rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("_flowchart_lca0_adj_exp2.png")

htmlwidgets::saveWidget(gr_lca4, "_flowchart_lca_adj_exp2.html")
webshot::webshot("_flowchart_lca_adj_exp2.html", "_flowchart_lca_adj_exp2.png",vwidth = 1200, vheight = 900,
        zoom = 2)

Figure 1: Gráfico esquemático

Los valores ceros se declararon datos perdidos.

Show code

f_adj<- item(CAUSAL, EDAD_MUJER_REC, PUEBLO_ORIGINARIO_REC, PAIS_ORIGEN_REC,HITO1_EDAD_GEST_SEM_REC, MACROZONA, PREV_TRAMO_REC) ~ outcome

Análisis de clases latentes

Show code

#Biemer, P. P., & Wiesen, C. (2002). Measurement error evaluation of self-reported drug use: a latent class analysis of the US National Household Survey on Drug Abuse. Journal of the Royal Statistical Society: Series A (Statistics in Society), 165(1), 97–119. doi:10.1111/1467-985x.00612  
#lca_entropia(x="ppio", seed= 2125, k= 8, f= f_preds, dat= mydata_preds, nbr_repet= 30, na_rm= T)
#3
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
# f is the selected variables
# dat is the data
# nb_var is the number of selected variables
# k is the number of latent class generated
# nbr_repet is the number of repetition to  
# reach the convergence of EM algorithm
# x es el código para las variables de los modelos
#seed es el numero random para las semillas. ej: 4345.
#Modo de calcular el mejor modelo.
#z_ # 
#2023-01-20
#https://github.com/QMUL/poLCAParallel/blob/master/exec/3_blrt.R
#0h s
seed<-2125
old <- Sys.time()

lca22 <- glca(f_adj, data = mydata_preds3, nclass = 2, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca23 <- glca(f_adj, data = mydata_preds3, nclass = 3, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca24 <- glca(f_adj, data = mydata_preds3, nclass = 4, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca25 <- glca(f_adj, data = mydata_preds3, nclass = 5, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca26 <- glca(f_adj, data = mydata_preds3, nclass = 6, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca27 <- glca(f_adj, data = mydata_preds3, nclass = 7, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca28 <- glca(f_adj, data = mydata_preds3, nclass = 8, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca29 <- glca(f_adj, data = mydata_preds3, nclass = 9, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca210 <- glca(f_adj, data = mydata_preds3, nclass = 10, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)

new_med<-(Sys.time())
paste0("The model took ",round(new_med-old,2)," until every LCA was computed")

[1] "The model took 1.55 until every LCA was computed"

Luego calculamos la razón de verosimilitud mediante remuestreo bootstrap (BLRT) entre los distintos modelos con el que asume una clase menos.

Show code

gof2<-
gofglca(lca22, lca23, lca24, lca25, lca26, lca27, lca28, lca29, lca210, test = "chisq")

bootlrt2<-
gofglca(lca22, lca23, lca24, lca25, lca26, lca27, lca28, lca29, lca210, test = "boot", nboot=n_bootstrap, seed=2125)

Resultados

Hicimos un gráfico de los resultados

Show code

manualcolors <- c('indianred1', 'cornflowerblue', 'gray50', 'darkolivegreen4', 'slateblue2', 
                  'firebrick4', 'goldenrod4')
levels4 <- c("logLik", "Gsq", "AIC", "CAIC", "BIC", "entropy", "Res.Df")
labels4 <- c('Log-Verosimilitud', 'Chi2', 'Criterio de Información\nde Akaike(AIC)'','AIC Corregido'','Criterio de Información\nBayesiano (BIC)')''Entropía'a'Grados de libertad residuales')s')
fig_lca_fit4<- cbind.data.frame(rn=2:10,gof2$gtable) %>%
  data.frame() %>% 
  dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
  tidyr::pivot_longer(cols = -rn,names_to = "indices", values_to = "value", values_drop_na = F) %>%
  dplyr::mutate(indices = factor(indices, levels = levels4, labels = labels4)) %>%
  dplyr::filter(grepl("(AIC|BIC)",indices, ignore.case=T))%>%
  dplyr::mutate(ModelIndex= factor(rn, levels=2:10)) %>% 
  ggplot(aes(x = ModelIndex, y = value, group = indices, color = indices, linetype = indices)) +
  geom_line(size = 1.5) +
  scale_color_manual(values = manualcolors) +
  #scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  labs(x = "Número de clases"", y=="Valor"", color=="Medida"", linetype=="Medida"))++
  #facet_wrap(.~indices, scales = "free_y", nrow = 4, ncol = 1) +
  theme_bw()

fig_lca_fit4

Show code

ggsave("_fig1_comparison_glca.png",fig_lca_fit4, dpi=600)
#International Journal of Workplace Health Management  (Zhang et al., 2018).

Luego en una tabla

Show code

cbind.data.frame(rn=2:10,gof2$gtable) %>%#
  dplyr::select(rn, everything()) %>% 
    dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
    knitr::kable(format="markdown", caption="Índices de ajuste modelos")

Table 1: Índices de ajuste modelos
rn	logLik	AIC	CAIC	BIC	entropy	Res.Df	Gsq
2	-27703.87	55511.75	55888.22	55836.22	0.9471951	3736	6995.394
3	-27399.61	54957.21	55529.16	55450.16	0.7933576	3709	6386.860
4	-27197.56	54607.12	55374.55	55268.55	0.7487319	3682	5982.768
5	-27045.36	54356.73	55319.63	55186.63	0.7940232	3655	5678.377
6	-26897.50	54115.00	55273.38	55113.38	0.8073065	3628	5382.646
7	-26763.76	53901.52	55255.38	55068.38	0.8196171	3601	5115.172
8	-26672.18	53772.37	55321.69	55107.69	0.7960240	3574	4932.012
9	-26600.22	53682.43	55427.24	55186.24	0.7901783	3547	4788.081
10	-26530.64	53597.28	55537.56	55269.56	0.8587455	3520	4648.923

Show code

best_model2<-
as.numeric(cbind.data.frame(rn=2:10,gof2$gtable) %>% dplyr::summarise(which.min(BIC)+1))

Presentamos el modelo con mejor ajuste

Show code

summary(eval(parse(text = paste0("lca2",best_model2)))) #


Call:
glca(formula = f_adj, data = mydata_preds3, nclass = 7, n.init = 500, 
    decreasing = T, testiter = 500, maxiter = 10000, seed = seed, 
    verbose = FALSE)

Manifest items : CAUSAL EDAD_MUJER_REC PUEBLO_ORIGINARIO_REC PAIS_ORIGEN_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PREV_TRAMO_REC 
Covariates (Level 1) : outcome 

Categories for manifest items :
                        Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
CAUSAL                      2     3     4                  
EDAD_MUJER_REC              1     2     3     4     5     6
PUEBLO_ORIGINARIO_REC       1     2     3                  
PAIS_ORIGEN_REC             1     2     3                  
HITO1_EDAD_GEST_SEM_REC     1     2     3     4     5     6
MACROZONA                   1     2     3     4     5     6
PREV_TRAMO_REC              1     2     3     4     5      

Model : Latent class analysis 

Number of latent classes : 7 
Number of observations : 3789 
Number of parameters : 187 

log-likelihood : -26763.76 
     G-squared : 5115.172 
           AIC : 53901.52 
           BIC : 55068.38 

Marginal prevalences for latent classes :
Class 1 Class 2 Class 3 Class 4 Class 5 Class 6 Class 7 
0.18988 0.04756 0.08593 0.31208 0.16780 0.04381 0.15293 

Class prevalences by group :
    Class 1 Class 2 Class 3 Class 4 Class 5 Class 6 Class 7
ALL 0.18988 0.04756 0.08593 0.31208  0.1678 0.04381 0.15293

Logistic regression coefficients :
            Class 1/7 Class 2/7 Class 3/7 Class 4/7 Class 5/7
(Intercept)    0.9933   -1.0814   -0.0580    1.9176   -0.3817
outcome1      -0.8885   -0.0947   -0.5814   -1.4400    0.5080
            Class 6/7
(Intercept)   -1.5881
outcome1       0.3634
Item-response probabilities :
CAUSAL 
         Y = 1  Y = 2  Y = 3
Class 1 0.9716 0.0284 0.0000
Class 2 0.7457 0.2543 0.0000
Class 3 0.3733 0.6267 0.0000
Class 4 0.1303 0.8697 0.0000
Class 5 0.0748 0.9252 0.0000
Class 6 0.0591 0.0000 0.9409
Class 7 0.0080 0.0000 0.9920
EDAD_MUJER_REC 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5  Y = 6
Class 1 0.0007 0.0112 0.2433 0.4167 0.2923 0.0359
Class 2 0.0681 0.0362 0.2959 0.3122 0.2496 0.0380
Class 3 0.0000 0.0169 0.3047 0.3113 0.3103 0.0569
Class 4 0.0017 0.0464 0.3004 0.3202 0.2646 0.0666
Class 5 0.0034 0.0000 0.1020 0.3156 0.5052 0.0738
Class 6 0.0000 0.1787 0.3661 0.2472 0.1782 0.0298
Class 7 0.0017 0.3683 0.2944 0.2092 0.1071 0.0192
PUEBLO_ORIGINARIO_REC 
         Y = 1  Y = 2  Y = 3
Class 1 0.0844 0.8658 0.0498
Class 2 1.0000 0.0000 0.0000
Class 3 0.1309 0.8321 0.0369
Class 4 0.1286 0.8071 0.0642
Class 5 0.1382 0.8618 0.0000
Class 6 0.1292 0.8336 0.0372
Class 7 0.1519 0.8031 0.0449
PAIS_ORIGEN_REC 
         Y = 1  Y = 2  Y = 3
Class 1 0.0000 0.8834 0.1166
Class 2 0.0848 0.8242 0.0910
Class 3 0.0084 0.0000 0.9916
Class 4 0.0000 0.9692 0.0308
Class 5 0.0000 0.9219 0.0781
Class 6 0.0000 0.0000 1.0000
Class 7 0.0000 0.9917 0.0083
HITO1_EDAD_GEST_SEM_REC 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5  Y = 6
Class 1 0.0749 0.2015 0.2306 0.4870 0.0000 0.0059
Class 2 0.0574 0.0000 0.0905 0.2468 0.2951 0.3101
Class 3 0.0077 0.0077 0.3147 0.3899 0.2207 0.0594
Class 4 0.0013 0.0000 0.2861 0.3710 0.2451 0.0964
Class 5 0.0161 0.0047 0.6078 0.2572 0.0992 0.0149
Class 6 0.0204 0.7946 0.1851 0.0000 0.0000 0.0000
Class 7 0.0088 0.7794 0.2119 0.0000 0.0000 0.0000
MACROZONA 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5  Y = 6
Class 1 0.0000 0.3971 0.1643 0.1693 0.1109 0.1584
Class 2 0.0327 0.1899 0.3338 0.1624 0.0986 0.1826
Class 3 0.0000 0.6294 0.0846 0.0149 0.2518 0.0193
Class 4 0.0009 0.2832 0.1819 0.2321 0.0903 0.2116
Class 5 0.0000 0.6807 0.1169 0.0600 0.0713 0.0712
Class 6 0.0060 0.5588 0.0796 0.0161 0.2983 0.0412
Class 7 0.0052 0.3843 0.1752 0.1447 0.0860 0.2046
PREV_TRAMO_REC 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5
Class 1 0.0000 0.0874 0.5601 0.3524 0.0000
Class 2 0.0243 0.0403 0.6488 0.2295 0.0571
Class 3 0.0066 0.0000 0.6076 0.2792 0.1065
Class 4 0.0013 0.0116 0.6462 0.3410 0.0000
Class 5 0.0000 0.5706 0.1718 0.2509 0.0068
Class 6 0.0299 0.0040 0.5209 0.1640 0.2812
Class 7 0.0017 0.0703 0.7221 0.2006 0.0053

Show code

save.image("data2_lca2_adj4.RData")

Show code

require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))

Paso 2.4

Análisis de clases latentes

Especificar modelo

Análisis de clases latentes

Resultados