Análisis de clases latentes exploratoria y comparativa con predictores (glca)
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js" $(document).ready(function() {
$('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
// onClick function for all plots (img's)
$('img:not(.zoomImg)').click(function() {
$('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
$('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
});
// onClick function for zoomImg
$('img.zoomImg').click(function() {
$('.zoomDiv').css({opacity: '0', width: '0%'});
});
});
<script src="hideOutput.js"></script> $(document).ready(function() {
$chunks = $('.fold');
$chunks.each(function () { // add button to source code chunks
if ( $(this).hasClass('s') ) {
$('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
$('pre.r', this).children('code').attr('class', 'folded');
} // add button to output chunks
if ( $(this).hasClass('o') ) {
$('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");
$('pre:not(.r)', this).children('code:not(r)').addClass('folded'); // add button to plots
$(this).find('img').wrap('<pre class=\"plot\"></pre>');
$('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");
$('pre.plot', this).children('img').addClass('folded');
}
}); // hide all chunks when document is loaded
$('.folded').css('display', 'none') // function to toggle the visibility
$('.showopt').click(function() {
var label = $(this).html();
if (label.indexOf("Show") >= 0) {
$(this).html(label.replace("Show", "Hide"));
} else {
$(this).html(label.replace("Hide", "Show"));
}
$(this).siblings('code, img').slideToggle('fast', 'swing');
});
}); Cargamos los datos
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 535731 28.7 1208804 64.6 643711 34.4
Vcells 906385 7.0 8388608 64.0 1649728 12.6
load("data2_lca2_2023_04_26.RData")
Cargamos los paquetes
knitr::opts_chunk$set(echo = TRUE)
if(!require(poLCA)){install.packages("poLCA")}
if(!require(poLCAParallel)){devtools::install_github("QMUL/poLCAParallel@package")}
if(!require(compareGroups)){install.packages("compareGroups")}
if(!require(parallel)){install.packages("parallel")}
if(!require(Hmisc)){install.packages("Hmisc")}
if(!require(tidyverse)){install.packages("tidyverse")}
try(if(!require(sjPlot)){install.packages("sjPlot")})
if(!require(emmeans)){install.packages("emmeans")}
if(!require(nnet)){install.packages("nnet")}
if(!require(here)){install.packages("here")}
if(!require(doParallel)){install.packages("doParallel")}
if(!require(progress)){install.packages("progress")}
if(!require(caret)){install.packages("caret")}
if(!require(rpart)){install.packages("rpart")}
if(!require(rpart.plot)){install.packages("rpart.plot")}
if(!require(partykit)){install.packages("partykit")}
if(!require(randomForest)){install.packages("randomForest")}
if(!require(ggcorrplot)){install.packages("ggcorrplot")}
if(!require(polycor)){install.packages("polycor")}
if(!require(tableone)){install.packages("tableone")}
if(!require(broom)){install.packages("broom")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rsvg)){install.packages("rsvg")}
if(!require(DiagrammeRsvg)){install.packages("DiagrammeRsvg")}
if(!require(glca)){install.packages("glca")}
#if(!require(poLCA)){githubinstall::gh_install_packages("poLCA", ref = github_pull("14"))}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
Definimos ciertas constantes
clus_iter= 500 #500
n_thread <- parallel::detectCores()
nrep <- clus_iter # number of different initial values (could be n_thread too)
n_class_max <- 10 # maximum number of classes to investigate
n_bootstrap <- 100 #30 # 50 number of bootstrap samples
print(n_thread)
[1] 8
library(DiagrammeR) #⋉
gr_lca4<-
DiagrammeR::grViz([1352 chars quoted with '"'])#, width = 1200, height = 900
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
DPI = 1200
WidthCM = 21
HeightCM = 8
gr_lca4 %>%
export_svg %>% charToRaw %>% rsvg_pdf("_flowchart_lca_adj._exp2pdf")
gr_lca4 %>% export_svg()%>%charToRaw %>% rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("_flowchart_lca0_adj_exp2.png")
htmlwidgets::saveWidget(gr_lca4, "_flowchart_lca_adj_exp2.html")
webshot::webshot("_flowchart_lca_adj_exp2.html", "_flowchart_lca_adj_exp2.png",vwidth = 1200, vheight = 900,
zoom = 2)
Figure 1: Gráfico esquemático
Los valores ceros se declararon datos perdidos.
f_adj<- item(CAUSAL, EDAD_MUJER_REC, PUEBLO_ORIGINARIO_REC, PAIS_ORIGEN_REC,HITO1_EDAD_GEST_SEM_REC, MACROZONA, PREV_TRAMO_REC) ~ outcome
#Biemer, P. P., & Wiesen, C. (2002). Measurement error evaluation of self-reported drug use: a latent class analysis of the US National Household Survey on Drug Abuse. Journal of the Royal Statistical Society: Series A (Statistics in Society), 165(1), 97–119. doi:10.1111/1467-985x.00612
#lca_entropia(x="ppio", seed= 2125, k= 8, f= f_preds, dat= mydata_preds, nbr_repet= 30, na_rm= T)
#3
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
# f is the selected variables
# dat is the data
# nb_var is the number of selected variables
# k is the number of latent class generated
# nbr_repet is the number of repetition to
# reach the convergence of EM algorithm
# x es el código para las variables de los modelos
#seed es el numero random para las semillas. ej: 4345.
#Modo de calcular el mejor modelo.
#z_ #
#2023-01-20
#https://github.com/QMUL/poLCAParallel/blob/master/exec/3_blrt.R
#0h s
seed<-2125
old <- Sys.time()
lca22 <- glca(f_adj, data = mydata_preds3, nclass = 2, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca23 <- glca(f_adj, data = mydata_preds3, nclass = 3, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca24 <- glca(f_adj, data = mydata_preds3, nclass = 4, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca25 <- glca(f_adj, data = mydata_preds3, nclass = 5, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca26 <- glca(f_adj, data = mydata_preds3, nclass = 6, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca27 <- glca(f_adj, data = mydata_preds3, nclass = 7, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca28 <- glca(f_adj, data = mydata_preds3, nclass = 8, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca29 <- glca(f_adj, data = mydata_preds3, nclass = 9, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca210 <- glca(f_adj, data = mydata_preds3, nclass = 10, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
new_med<-(Sys.time())
paste0("The model took ",round(new_med-old,2)," until every LCA was computed")
[1] "The model took 1.55 until every LCA was computed"
Luego calculamos la razón de verosimilitud mediante remuestreo bootstrap (BLRT) entre los distintos modelos con el que asume una clase menos.
Hicimos un gráfico de los resultados
manualcolors <- c('indianred1', 'cornflowerblue', 'gray50', 'darkolivegreen4', 'slateblue2',
'firebrick4', 'goldenrod4')
levels4 <- c("logLik", "Gsq", "AIC", "CAIC", "BIC", "entropy", "Res.Df")
labels4 <- c('Log-Verosimilitud', 'Chi2', 'Criterio de Información\nde Akaike(AIC)'','AIC Corregido'','Criterio de Información\nBayesiano (BIC)')''Entropía'a'Grados de libertad residuales')s')
fig_lca_fit4<- cbind.data.frame(rn=2:10,gof2$gtable) %>%
data.frame() %>%
dplyr::mutate_if(is.character, as.numeric) %>% # convert character columns to numeric
tidyr::pivot_longer(cols = -rn,names_to = "indices", values_to = "value", values_drop_na = F) %>%
dplyr::mutate(indices = factor(indices, levels = levels4, labels = labels4)) %>%
dplyr::filter(grepl("(AIC|BIC)",indices, ignore.case=T))%>%
dplyr::mutate(ModelIndex= factor(rn, levels=2:10)) %>%
ggplot(aes(x = ModelIndex, y = value, group = indices, color = indices, linetype = indices)) +
geom_line(size = 1.5) +
scale_color_manual(values = manualcolors) +
#scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
labs(x = "Número de clases"", y=="Valor"", color=="Medida"", linetype=="Medida"))++
#facet_wrap(.~indices, scales = "free_y", nrow = 4, ncol = 1) +
theme_bw()
fig_lca_fit4
ggsave("_fig1_comparison_glca.png",fig_lca_fit4, dpi=600)
#International Journal of Workplace Health Management (Zhang et al., 2018).
Luego en una tabla
cbind.data.frame(rn=2:10,gof2$gtable) %>%#
dplyr::select(rn, everything()) %>%
dplyr::mutate_if(is.character, as.numeric) %>% # convert character columns to numeric
knitr::kable(format="markdown", caption="Índices de ajuste modelos")
| rn | logLik | AIC | CAIC | BIC | entropy | Res.Df | Gsq |
|---|---|---|---|---|---|---|---|
| 2 | -27703.87 | 55511.75 | 55888.22 | 55836.22 | 0.9471951 | 3736 | 6995.394 |
| 3 | -27399.61 | 54957.21 | 55529.16 | 55450.16 | 0.7933576 | 3709 | 6386.860 |
| 4 | -27197.56 | 54607.12 | 55374.55 | 55268.55 | 0.7487319 | 3682 | 5982.768 |
| 5 | -27045.36 | 54356.73 | 55319.63 | 55186.63 | 0.7940232 | 3655 | 5678.377 |
| 6 | -26897.50 | 54115.00 | 55273.38 | 55113.38 | 0.8073065 | 3628 | 5382.646 |
| 7 | -26763.76 | 53901.52 | 55255.38 | 55068.38 | 0.8196171 | 3601 | 5115.172 |
| 8 | -26672.18 | 53772.37 | 55321.69 | 55107.69 | 0.7960240 | 3574 | 4932.012 |
| 9 | -26600.22 | 53682.43 | 55427.24 | 55186.24 | 0.7901783 | 3547 | 4788.081 |
| 10 | -26530.64 | 53597.28 | 55537.56 | 55269.56 | 0.8587455 | 3520 | 4648.923 |
best_model2<-
as.numeric(cbind.data.frame(rn=2:10,gof2$gtable) %>% dplyr::summarise(which.min(BIC)+1))
Presentamos el modelo con mejor ajuste
Call:
glca(formula = f_adj, data = mydata_preds3, nclass = 7, n.init = 500,
decreasing = T, testiter = 500, maxiter = 10000, seed = seed,
verbose = FALSE)
Manifest items : CAUSAL EDAD_MUJER_REC PUEBLO_ORIGINARIO_REC PAIS_ORIGEN_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PREV_TRAMO_REC
Covariates (Level 1) : outcome
Categories for manifest items :
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
CAUSAL 2 3 4
EDAD_MUJER_REC 1 2 3 4 5 6
PUEBLO_ORIGINARIO_REC 1 2 3
PAIS_ORIGEN_REC 1 2 3
HITO1_EDAD_GEST_SEM_REC 1 2 3 4 5 6
MACROZONA 1 2 3 4 5 6
PREV_TRAMO_REC 1 2 3 4 5
Model : Latent class analysis
Number of latent classes : 7
Number of observations : 3789
Number of parameters : 187
log-likelihood : -26763.76
G-squared : 5115.172
AIC : 53901.52
BIC : 55068.38
Marginal prevalences for latent classes :
Class 1 Class 2 Class 3 Class 4 Class 5 Class 6 Class 7
0.18988 0.04756 0.08593 0.31208 0.16780 0.04381 0.15293
Class prevalences by group :
Class 1 Class 2 Class 3 Class 4 Class 5 Class 6 Class 7
ALL 0.18988 0.04756 0.08593 0.31208 0.1678 0.04381 0.15293
Logistic regression coefficients :
Class 1/7 Class 2/7 Class 3/7 Class 4/7 Class 5/7
(Intercept) 0.9933 -1.0814 -0.0580 1.9176 -0.3817
outcome1 -0.8885 -0.0947 -0.5814 -1.4400 0.5080
Class 6/7
(Intercept) -1.5881
outcome1 0.3634
Item-response probabilities :
CAUSAL
Y = 1 Y = 2 Y = 3
Class 1 0.9716 0.0284 0.0000
Class 2 0.7457 0.2543 0.0000
Class 3 0.3733 0.6267 0.0000
Class 4 0.1303 0.8697 0.0000
Class 5 0.0748 0.9252 0.0000
Class 6 0.0591 0.0000 0.9409
Class 7 0.0080 0.0000 0.9920
EDAD_MUJER_REC
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
Class 1 0.0007 0.0112 0.2433 0.4167 0.2923 0.0359
Class 2 0.0681 0.0362 0.2959 0.3122 0.2496 0.0380
Class 3 0.0000 0.0169 0.3047 0.3113 0.3103 0.0569
Class 4 0.0017 0.0464 0.3004 0.3202 0.2646 0.0666
Class 5 0.0034 0.0000 0.1020 0.3156 0.5052 0.0738
Class 6 0.0000 0.1787 0.3661 0.2472 0.1782 0.0298
Class 7 0.0017 0.3683 0.2944 0.2092 0.1071 0.0192
PUEBLO_ORIGINARIO_REC
Y = 1 Y = 2 Y = 3
Class 1 0.0844 0.8658 0.0498
Class 2 1.0000 0.0000 0.0000
Class 3 0.1309 0.8321 0.0369
Class 4 0.1286 0.8071 0.0642
Class 5 0.1382 0.8618 0.0000
Class 6 0.1292 0.8336 0.0372
Class 7 0.1519 0.8031 0.0449
PAIS_ORIGEN_REC
Y = 1 Y = 2 Y = 3
Class 1 0.0000 0.8834 0.1166
Class 2 0.0848 0.8242 0.0910
Class 3 0.0084 0.0000 0.9916
Class 4 0.0000 0.9692 0.0308
Class 5 0.0000 0.9219 0.0781
Class 6 0.0000 0.0000 1.0000
Class 7 0.0000 0.9917 0.0083
HITO1_EDAD_GEST_SEM_REC
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
Class 1 0.0749 0.2015 0.2306 0.4870 0.0000 0.0059
Class 2 0.0574 0.0000 0.0905 0.2468 0.2951 0.3101
Class 3 0.0077 0.0077 0.3147 0.3899 0.2207 0.0594
Class 4 0.0013 0.0000 0.2861 0.3710 0.2451 0.0964
Class 5 0.0161 0.0047 0.6078 0.2572 0.0992 0.0149
Class 6 0.0204 0.7946 0.1851 0.0000 0.0000 0.0000
Class 7 0.0088 0.7794 0.2119 0.0000 0.0000 0.0000
MACROZONA
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
Class 1 0.0000 0.3971 0.1643 0.1693 0.1109 0.1584
Class 2 0.0327 0.1899 0.3338 0.1624 0.0986 0.1826
Class 3 0.0000 0.6294 0.0846 0.0149 0.2518 0.0193
Class 4 0.0009 0.2832 0.1819 0.2321 0.0903 0.2116
Class 5 0.0000 0.6807 0.1169 0.0600 0.0713 0.0712
Class 6 0.0060 0.5588 0.0796 0.0161 0.2983 0.0412
Class 7 0.0052 0.3843 0.1752 0.1447 0.0860 0.2046
PREV_TRAMO_REC
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5
Class 1 0.0000 0.0874 0.5601 0.3524 0.0000
Class 2 0.0243 0.0403 0.6488 0.2295 0.0571
Class 3 0.0066 0.0000 0.6076 0.2792 0.1065
Class 4 0.0013 0.0116 0.6462 0.3410 0.0000
Class 5 0.0000 0.5706 0.1718 0.2509 0.0068
Class 6 0.0299 0.0040 0.5209 0.1640 0.2812
Class 7 0.0017 0.0703 0.7221 0.2006 0.0053
save.image("data2_lca2_adj4.RData")
require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
tibble::as_tibble(sesion_info$packages),
c(package, loadedversion, source)
) %>%
DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
'', htmltools::em('Packages')),
options=list(
initComplete = htmlwidgets::JS(
"function(settings, json) {",
"$(this.api().tables().body()).css({
'font-family': 'Helvetica Neue',
'font-size': '50%',
'code-inline-font-size': '15%',
'white-space': 'nowrap',
'line-height': '0.75em',
'min-height': '0.5em'
});",#;
"}")))