Análisis de clases latentes exploratoria y comparativa sin predictores (glca)
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js" $(document).ready(function() {
$('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
// onClick function for all plots (img's)
$('img:not(.zoomImg)').click(function() {
$('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
$('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
});
// onClick function for zoomImg
$('img.zoomImg').click(function() {
$('.zoomDiv').css({opacity: '0', width: '0%'});
});
});
<script src="hideOutput.js"></script> $(document).ready(function() {
$chunks = $('.fold');
$chunks.each(function () { // add button to source code chunks
if ( $(this).hasClass('s') ) {
$('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
$('pre.r', this).children('code').attr('class', 'folded');
} // add button to output chunks
if ( $(this).hasClass('o') ) {
$('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");
$('pre:not(.r)', this).children('code:not(r)').addClass('folded'); // add button to plots
$(this).find('img').wrap('<pre class=\"plot\"></pre>');
$('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");
$('pre.plot', this).children('img').addClass('folded');
}
}); // hide all chunks when document is loaded
$('.folded').css('display', 'none') // function to toggle the visibility
$('.showopt').click(function() {
var label = $(this).html();
if (label.indexOf("Show") >= 0) {
$(this).html(label.replace("Show", "Hide"));
} else {
$(this).html(label.replace("Hide", "Show"));
}
$(this).siblings('code, img').slideToggle('fast', 'swing');
});
}); Cargamos los datos
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 536001 28.7 1209575 64.6 643711 34.4
Vcells 909037 7.0 8388608 64.0 1649632 12.6
load("data2.RData")
Cargamos los paquetes
knitr::opts_chunk$set(echo = TRUE)
if(!require(poLCA)){install.packages("poLCA")}
if(!require(poLCAParallel)){devtools::install_github("QMUL/poLCAParallel@package")}
if(!require(compareGroups)){install.packages("compareGroups")}
if(!require(parallel)){install.packages("parallel")}
if(!require(Hmisc)){install.packages("Hmisc")}
if(!require(tidyverse)){install.packages("tidyverse")}
try(if(!require(sjPlot)){install.packages("sjPlot")})
if(!require(emmeans)){install.packages("emmeans")}
if(!require(nnet)){install.packages("nnet")}
if(!require(here)){install.packages("here")}
if(!require(doParallel)){install.packages("doParallel")}
if(!require(progress)){install.packages("progress")}
if(!require(caret)){install.packages("caret")}
if(!require(rpart)){install.packages("rpart")}
if(!require(rpart.plot)){install.packages("rpart.plot")}
if(!require(partykit)){install.packages("partykit")}
if(!require(randomForest)){install.packages("randomForest")}
if(!require(ggcorrplot)){install.packages("ggcorrplot")}
if(!require(polycor)){install.packages("polycor")}
if(!require(tableone)){install.packages("tableone")}
if(!require(DiagrammeR)){install.packages("DiagrammeR")}
if(!require(rsvg)){install.packages("rsvg")}
if(!require(DiagrammeRsvg)){install.packages("DiagrammeRsvg")}
if(!require(webshot)){install.packages("webshot")}
if(!require(htmlwidgets)){install.packages("htmlwidgets")}
if(!require(glca)){install.packages("glca")}
#if(!require(poLCA)){githubinstall::gh_install_packages("poLCA", ref = github_pull("14"))}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
lca_dir<-here::here()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
tryNA <- function(x){
x <- try(x)
if(inherits(x,'try-error')) return(NA)
x
}
#https://rdrr.io/github/hyunsooseol/snowRMM/src/R/lca.b.R
#https://github.com/dlinzer/poLCA/issues/7
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#' Bivariate residuals for latent class models
#'
#' Calculate the "bivariate residuals" (BVRs) between pairs of variables
#' in a latent class model.
#'
#' This function compares the model-implied (expected) counts in the crosstables
#' of all pairs of observed dependent variables to the observed counts. For each
#' pair, it calculates a "chi-square" statistic,
#'
#' \deqn{\text{BVR} = \sum_{j, j'} \frac{(n_{jj'} - e_{jj'})^2}{e_{jj'}}},
#'
#' where \eqn{n_{jj'}} are the observed counts for categories \eqn{j} and \eqn{j'}
#' of the variables being crosstabulated, and \eqn{e_{jj'}} are
#' the expected counts under the latent class model.
#'
#' Note that the BVR does not follow an asymptotic chi-square distribution and
#' for accurate p-values, parametric bootstrapping is necessary (Oberski et al. 2013).
#'
#' @param fit A poLCA fit object
#' @param tol Optional: tolerance for small expected counts
#' @param rescale_to_df Optional: whether to divide the pairwise "chi-square" values by
#' the degrees of freedom of the local crosstable. Default is TRUE.
#' @return The table of bivariate residuals
#' @author Daniel Oberski (daniel.oberski@gmail.com)
#' @seealso \code{\link{poLCA}} for fitting the latent class model.
#' @references
#' Oberski, DL, Van Kollenburg, GH and Vermunt, JK (2013).
#' A Monte Carlo evaluation of three methods to detect local dependence in binary data latent class models.
#' Advances in Data Analysis and Classification 7 (3), 267-279.
#' @examples
#' data(values)
#' f <- cbind(A, B, C, D) ~ 1
#' M0 <- poLCA(f,values, nclass=1, verbose = FALSE)
#' bvr(M0) # 12.4, 5.7, 8.3, 15.6, ...
bvr <- function(fit, tol = 1e-3, rescale_to_df = TRUE) {
stopifnot(class(fit) == "poLCA")
ov_names <- names(fit$predcell)[1:(ncol(fit$predcell) - 2)]
ov_combn <- combn(ov_names, 2)
get_bvr <- function(ov_pair) {
form_obs <- as.formula(paste0("observed ~ ", ov_pair[1], " + ", ov_pair[2]))
form_exp <- as.formula(paste0("expected ~ ", ov_pair[1], " + ", ov_pair[2]))
counts_obs <- xtabs(form_obs, data = fit$predcell)
counts_exp <- xtabs(form_exp, data = fit$predcell)
counts_exp <- ifelse(counts_exp < tol, tol, counts_exp) # Prevent Inf/NaN
bvr_df <- prod(dim(counts_exp) - 1)
bvr_value <- sum((counts_obs - counts_exp)^2 / counts_exp)
if(rescale_to_df) bvr_value <- bvr_value / bvr_df
attr(bvr_value, "df") <- bvr_df
bvr_value
}
bvr_pairs <- apply(ov_combn, 2, get_bvr)
attr(bvr_pairs, "rescale_to_df") <- rescale_to_df
attr(bvr_pairs, "class") <- "dist"
attr(bvr_pairs, "Size") <- length(ov_names)
attr(bvr_pairs, "Labels") <- ov_names
attr(bvr_pairs, "Diag") <- FALSE
attr(bvr_pairs, "Upper") <- FALSE
bvr_pairs
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
poLCA.entropy.fix <- function (lc)
{
K.j <- sapply(lc$probs, ncol)
fullcell <- expand.grid(sapply(K.j, seq, from = 1))
P.c <- poLCA.predcell(lc, fullcell)
return(-sum(P.c * log(P.c), na.rm = TRUE))
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#Calculate entropy R2 for poLCA model
# MIT license
# Author: Daniel Oberski
# Input: result of a poLCA model fit
# Output: entropy R^2 statistic (Vermunt & Magidson, 2013, p. 71)
# See: daob.nl/wp-content/uploads/2015/07/ESRA-course-slides.pdf
# And: https://www.statisticalinnovations.com/wp-content/uploads/LGtecnical.pdf
machine_tolerance <- sqrt(.Machine$double.eps)
entropy.R2 <- function(fit) {
entropy <- function(p) {
p <- p[p > machine_tolerance] # since Lim_{p->0} p log(p) = 0
sum(-p * log(p))
}
error_prior <- entropy(fit$P) # Class proportions
error_post <- mean(apply(fit$posterior, 1, entropy))
R2_entropy <- (error_prior - error_post) / error_prior
R2_entropy
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#http://researchdata.gla.ac.uk/879/1/Survey_data_processed_using_R.pdf
##Function to plot variable probabilites by latent class
## Function to undertake chisquare analayis and plot graphs of residuals and contributions
chisquaretest.predictions.function <-
function(indfactor.data,
predclass.data,
noclasses,
pitem,
gitem,
chirows,
chicols) {
chisquare.results <- chisq.test(indfactor.data, predclass.data)
residuals.data <- chisquare.results$residuals
colnames(residuals.data) <- chicols
rownames(residuals.data) <- chirows
title.text <-
paste(
"Residuals: chi Square Crosstabulation of\n",
pitem,
"and",
gitem,
"\n(Chisquare =",
round(chisquare.results$statistic, 3),
" p <",
round(chisquare.results$p.value, 3),
")",
sep = " "
)
corrplot(
residuals.data,
is.cor = FALSE,
title = title.text,
mar = c(0, 0, 4, 0)
)
contrib.data <-
100 * residuals.data ^ 2 / chisquare.results$statistic
round(contrib.data, 3)
colnames(contrib.data) <- chicols
rownames(contrib.data) <- chirows
title.text <-
paste(
"Contributions: chi Square Crosstabulation of\n",
pitem,
"and",
gitem,
"\n(Chisquare =",
round(chisquare.results$statistic, 3),
" p <",
round(chisquare.results$p.value, 3),
")",
sep = " "
)
corrplot(
contrib.data,
is.cor = FALSE,
title = title.text,
mar = c(0, 0, 4, 0)
)
return(chisquare.results)
}
##Funciton for Cramers V test
cv.test = function(x, y) {
CV = sqrt(chisq.test(x, y, correct = FALSE)$statistic /
(length(x) * (min(
length(unique(x)), length(unique(y))
) - 1)))
print.noquote("Cramér V / Phi:"))
return(as.numeric(CV))
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
if(.Platform$OS.type == "windows") withAutoprint({
memory.size()
memory.size(TRUE)
memory.limit(size=56000)
})
path<-try(dirname(rstudioapi::getSourceEditorContext()$path))
options(knitr.kable.NA = '')
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
knitr::knit_hooks$set(time_it = local({
now <- NULL
function(before, options) {
if (before) {
# record the current time before each chunk
now <<- Sys.time()
} else {
# calculate the time difference after a chunk
res <- ifelse(difftime(Sys.time(), now)>(60^2),difftime(Sys.time(), now)/(60^2),difftime(Sys.time(), now)/(60^1))
# return a character string to show the time
x<-ifelse(difftime(Sys.time(), now)>(60^2),paste("Time for this code chunk to run:", round(res,1), "hours"),paste("Time for this code chunk to run:", round(res,1), "minutes"))
paste('<div class="message">', gsub('##', '\n', x),'</div>', sep = '\n')
}
}
}))
knitr::opts_chunk$set(time_it = TRUE)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#to format rows in bold
format_cells <- function(df, rows ,cols, value = c("italics", "bold", "strikethrough")){
# select the correct markup
# one * for italics, two ** for bold
map <- setNames(c("*", "**", "~~"), c("italics", "bold", "strikethrough"))
markup <- map[value]
for (r in rows){
for(c in cols){
# Make sure values are not factors
df[[c]] <- as.character( df[[c]])
# Update formatting
df[r, c] <- ifelse(nchar(df[r, c])==0,"",paste0(markup, gsub(" ", "", df[r, c]), markup))
}
}
return(df)
}
#To produce line breaks in messages and warnings
knitr::knit_hooks$set(
error = function(x, options) {
paste('\n\n<div class="alert alert-danger">',
gsub('##', '\n', gsub('^##\ Error', '**Error**', x)),
'</div>', sep = '\n')
},
warning = function(x, options) {
paste('\n\n<div class="alert alert-warning">',
gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)),
'</div>', sep = '\n')
},
message = function(x, options) {
paste('<div class="message">',
gsub('##', '\n', x),
'</div>', sep = '\n')
}
)
#
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
as.data.frame.TableOne <- function(x, ...) {capture.output(print(x,
showAllLevels = TRUE, ...) -> x)
y <- as.data.frame(x)
y$charactersitic <- dplyr::na_if(rownames(x), "")
y <- y %>%
fill(charactersitic, .direction = "down") %>%
select(charactersitic, everything())
rownames(y) <- NULL
y}
Definimos ciertas constantes
clus_iter= 500#500 #500
n_thread <- parallel::detectCores()
nrep <- clus_iter # number of different initial values (could be n_thread too)
n_class_max <- 12 # maximum number of classes to investigate
n_bootstrap <- 100#00 #30 # 50 number of bootstrap samples
print(n_thread)
[1] 8
library(DiagrammeR) #⋉
gr_lca3<-
DiagrammeR::grViz([1079 chars quoted with '"'])#, width = 1200, height = 900
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
DPI = 1200
WidthCM = 21
HeightCM = 8
gr_lca3 %>%
export_svg %>% charToRaw %>% rsvg_pdf("_flowchart_lca_exp.pdf")
gr_lca3 %>% export_svg()%>%charToRaw %>% rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("_flowchart_lca0_exp.png")
htmlwidgets::saveWidget(gr_lca3, "_flowchart_lca_exp.html")
webshot::webshot("_flowchart_lca_exp.html", "_flowchart_lca_exp.png",vwidth = 1200, vheight = 900,
zoom = 2)
Figure 1: Gráfico esquemático
Los valores ceros se declararon datos perdidos.
#table(data2$MACROZONA,data2$REGION_RESIDENCIA, exclude=NULL)
preds <- c("CAUSAL","EDAD_MUJER_REC", "PUEBLO_ORIGINARIO_REC","PAIS_ORIGEN_REC", "HITO1_EDAD_GEST_SEM_REC", "MACROZONA", "PREV_TRAMO_REC")
#c("AÑO","MES_NUM", "HITO3_COMPLICACION_POST_IVE", "HITO3_CONDICION_MUJER_POST_IVE", "HITO3_TIPO_ATENCION", "HITO3_SERV_SALUD_ESTABLECIMIENTO", "HITO4_MUJER_ACEPTA_ACOM"),
mydata_preds <- data2%>% dplyr::mutate(across(preds, ~ as.numeric(factor(.))+1))%>% dplyr::mutate(across(preds, ~ dplyr::case_when(is.na(.)~ 1, T~ .)))
#comprobar
#table(mydata_preds$sus_ini_mod_mvv, data2$sus_ini_mod_mvv, exclude=NULL)
f_preds <- item(CAUSAL, EDAD_MUJER_REC, PUEBLO_ORIGINARIO_REC, PAIS_ORIGEN_REC,HITO1_EDAD_GEST_SEM_REC, MACROZONA, PREV_TRAMO_REC) ~ 1
Modificamos la base de datos para incluir la variable resultado.
#dropped region name because it was too different,many categories
preds2 <-c(setdiff(preds,""),"HITO2_DECISION_MUJER_IVE")
mydata_preds2 <- data2%>%
dplyr::mutate(across(preds, ~ as.numeric(factor(.))+1))%>% dplyr::mutate(across(preds, ~ dplyr::case_when(is.na(.)~ 1, T~ .))) %>% dplyr::select(any_of(preds2)) %>% dplyr::mutate(outcome= factor(as.numeric(grepl('INTERRUMPIR', HITO2_DECISION_MUJER_IVE))))
#data.table::data.table(.)
#
#table(mydata_preds2$HITO2_DECISION_MUJER_IVE)
#lapply(preds2, function(p) {prop.table(table(mydata_preds2[p]))})
Definimos los datos
mydata_preds3 <-
mydata_preds2 %>%
#dplyr::mutate_if(is.numeric, ~as.character(.)) %>% #si convierto a caracter entrega errores
data.table::data.table(.)
Así se ven los datos como los usa glca
Corremos glca.
#Biemer, P. P., & Wiesen, C. (2002). Measurement error evaluation of self-reported drug use: a latent class analysis of the US National Household Survey on Drug Abuse. Journal of the Royal Statistical Society: Series A (Statistics in Society), 165(1), 97–119. doi:10.1111/1467-985x.00612
#lca_entropia(x="ppio", seed= 2125, k= 8, f= f_preds, dat= mydata_preds, nbr_repet= 30, na_rm= T)
#3
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
# f is the selected variables
# dat is the data
# nb_var is the number of selected variables
# k is the number of latent class generated
# nbr_repet is the number of repetition to
# reach the convergence of EM algorithm
# x es el código para las variables de los modelos
#seed es el numero random para las semillas. ej: 4345.
#Modo de calcular el mejor modelo.
#z_ #
#2023-01-20
#https://github.com/QMUL/poLCAParallel/blob/master/exec/3_blrt.R
#0h s
seed<-2125
old <- Sys.time()
lca2 <- glca(f_preds, data = mydata_preds3, nclass = 2, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca3 <- glca(f_preds, data = mydata_preds3, nclass = 3, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca4 <- glca(f_preds, data = mydata_preds3, nclass = 4, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca5 <- glca(f_preds, data = mydata_preds3, nclass = 5, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca6 <- glca(f_preds, data = mydata_preds3, nclass = 6, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca7 <- glca(f_preds, data = mydata_preds3, nclass = 7, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca8 <- glca(f_preds, data = mydata_preds3, nclass = 8, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca9 <- glca(f_preds, data = mydata_preds3, nclass = 9, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca10 <- glca(f_preds, data = mydata_preds3, nclass = 10, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
new_med<-(Sys.time())
paste0("The model took ",round(new_med-old,2)," until every LCA was computed")
[1] "The model took 35.71 until every LCA was computed"
Luego calculamos la razón de verosimilitud mediante remuestreo bootstrap (BLRT) entre los distintos modelos con el que asume una clase menos.
Hicimos un gráfico de los resultados
manualcolors <- c('indianred1', 'cornflowerblue', 'gray50', 'darkolivegreen4', 'slateblue2',
'firebrick4', 'goldenrod4')
levels3 <- c("logLik", "Gsq", "AIC", "CAIC", "BIC", "entropy", "Res.Df")
labels3 <- c('Log-Verosimilitud', 'Chi2', 'Criterio de Información\nde Akaike(AIC)'','AIC Corregido'','Criterio de Información\nBayesiano (BIC)')''Entropía'a'Grados de libertad residuales')s')
fig_lca_fit3<- cbind.data.frame(rn=2:10,gof$gtable) %>%
data.frame() %>%
dplyr::mutate_if(is.character, as.numeric) %>% # convert character columns to numeric
tidyr::pivot_longer(cols = -rn,names_to = "indices", values_to = "value", values_drop_na = F) %>%
dplyr::mutate(indices = factor(indices, levels = levels3, labels = labels3)) %>%
dplyr::filter(grepl("(AIC|BIC)",indices, ignore.case=T))%>%
dplyr::mutate(ModelIndex= factor(rn, levels=2:10)) %>%
ggplot(aes(x = ModelIndex, y = value, group = indices, color = indices, linetype = indices)) +
geom_line(size = 1.5) +
scale_color_manual(values = manualcolors) +
#scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
labs(x = "Número de clases"", y=="Valor"", color=="Medida"", linetype=="Medida"))++
#facet_wrap(.~indices, scales = "free_y", nrow = 4, ncol = 1) +
theme_bw()
fig_lca_fit3
ggsave("_fig1_comparison_glca.png",fig_lca_fit3, dpi=600)
#International Journal of Workplace Health Management (Zhang et al., 2018).
Luego en una tabla
cbind.data.frame(rn=2:10,gof$gtable) %>%#
dplyr::select(rn, everything()) %>%
dplyr::mutate_if(is.character, as.numeric) %>% # convert character columns to numeric
knitr::kable(format="markdown", caption="Índices de ajuste modelos")
| rn | logLik | AIC | CAIC | BIC | entropy | Res.Df | Gsq |
|---|---|---|---|---|---|---|---|
| 2 | -27726.02 | 55554.04 | 55923.27 | 55872.27 | 0.9479098 | 3737 | 5523.627 |
| 3 | -27455.05 | 55064.10 | 55621.57 | 55544.57 | 0.8258576 | 3711 | 4981.690 |
| 4 | -27265.29 | 54736.58 | 55482.28 | 55379.28 | 0.7735804 | 3685 | 4602.167 |
| 5 | -27106.74 | 54471.48 | 55405.43 | 55276.43 | 0.8380111 | 3659 | 4285.075 |
| 6 | -26968.63 | 54247.26 | 55369.44 | 55214.44 | 0.8409540 | 3633 | 4008.856 |
| 7 | -26835.60 | 54033.20 | 55343.61 | 55162.61 | 0.8519267 | 3607 | 3742.786 |
| 8 | -26765.23 | 53944.45 | 55443.10 | 55236.10 | 0.8726149 | 3581 | 3602.044 |
| 9 | -26715.10 | 53896.20 | 55583.09 | 55350.09 | 0.8595011 | 3555 | 3501.795 |
| 10 | -26667.36 | 53852.72 | 55727.84 | 55468.84 | 0.8188851 | 3529 | 3406.313 |
best_model<-
as.numeric(cbind.data.frame(rn=2:10,gof$gtable) %>% dplyr::summarise(which.min(BIC)+1))
Presentamos el modelo con mejor ajuste
Call:
glca(formula = f_preds, data = mydata_preds3, nclass = 7, n.init = 500,
decreasing = T, testiter = 500, maxiter = 10000, seed = seed,
verbose = FALSE)
Manifest items : CAUSAL EDAD_MUJER_REC PUEBLO_ORIGINARIO_REC PAIS_ORIGEN_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PREV_TRAMO_REC
Categories for manifest items :
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
CAUSAL 2 3 4
EDAD_MUJER_REC 1 2 3 4 5 6
PUEBLO_ORIGINARIO_REC 1 2 3
PAIS_ORIGEN_REC 1 2 3
HITO1_EDAD_GEST_SEM_REC 1 2 3 4 5 6
MACROZONA 1 2 3 4 5 6
PREV_TRAMO_REC 1 2 3 4 5
Model : Latent class analysis
Number of latent classes : 7
Number of observations : 3789
Number of parameters : 181
log-likelihood : -26835.6
G-squared : 3742.786
AIC : 54033.2
BIC : 55162.61
Marginal prevalences for latent classes :
Class 1 Class 2 Class 3 Class 4 Class 5 Class 6 Class 7
0.19017 0.04980 0.09476 0.34128 0.12731 0.04374 0.15293
Class prevalences by group :
Class 1 Class 2 Class 3 Class 4 Class 5 Class 6 Class 7
ALL 0.19017 0.0498 0.09476 0.34128 0.12731 0.04374 0.15293
Item-response probabilities :
CAUSAL
Y = 1 Y = 2 Y = 3
Class 1 0.9688 0.0312 0.0000
Class 2 0.7077 0.2923 0.0000
Class 3 0.3454 0.6546 0.0000
Class 4 0.1273 0.8727 0.0000
Class 5 0.0756 0.9244 0.0000
Class 6 0.0556 0.0000 0.9444
Class 7 0.0086 0.0000 0.9914
EDAD_MUJER_REC
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
Class 1 0.0006 0.0106 0.2417 0.4167 0.2945 0.0359
Class 2 0.0684 0.0360 0.2950 0.3044 0.2476 0.0486
Class 3 0.0000 0.0170 0.2900 0.3167 0.3140 0.0623
Class 4 0.0017 0.0414 0.2874 0.3261 0.2783 0.0651
Class 5 0.0030 0.0017 0.0870 0.2979 0.5370 0.0733
Class 6 0.0000 0.1794 0.3665 0.2473 0.1770 0.0299
Class 7 0.0017 0.3681 0.2947 0.2092 0.1071 0.0192
PUEBLO_ORIGINARIO_REC
Y = 1 Y = 2 Y = 3
Class 1 0.0859 0.8644 0.0496
Class 2 1.0000 0.0000 0.0000
Class 3 0.1255 0.8373 0.0372
Class 4 0.1239 0.8184 0.0577
Class 5 0.1401 0.8599 0.0000
Class 6 0.1290 0.8337 0.0373
Class 7 0.1521 0.8030 0.0449
PAIS_ORIGEN_REC
Y = 1 Y = 2 Y = 3
Class 1 0.0000 0.8801 0.1199
Class 2 0.0838 0.8285 0.0876
Class 3 0.0061 0.0000 0.9939
Class 4 0.0000 0.9887 0.0113
Class 5 0.0000 0.9268 0.0732
Class 6 0.0000 0.0000 1.0000
Class 7 0.0000 0.9922 0.0078
HITO1_EDAD_GEST_SEM_REC
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
Class 1 0.0754 0.2027 0.2229 0.4909 0.0000 0.0081
Class 2 0.0500 0.0000 0.0914 0.2432 0.3096 0.3058
Class 3 0.0077 0.0071 0.3264 0.3672 0.2250 0.0666
Class 4 0.0063 0.0000 0.3262 0.3622 0.2232 0.0821
Class 5 0.0080 0.0040 0.6071 0.2574 0.1040 0.0195
Class 6 0.0202 0.7950 0.1848 0.0000 0.0000 0.0000
Class 7 0.0088 0.7795 0.2117 0.0000 0.0000 0.0000
MACROZONA
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
Class 1 0.0000 0.3995 0.1653 0.1662 0.1128 0.1562
Class 2 0.0311 0.1991 0.3146 0.1668 0.0996 0.1888
Class 3 0.0000 0.6108 0.0844 0.0372 0.2360 0.0316
Class 4 0.0009 0.3192 0.1827 0.2115 0.0888 0.1970
Class 5 0.0000 0.6956 0.1042 0.0629 0.0663 0.0711
Class 6 0.0060 0.5570 0.0803 0.0159 0.2994 0.0413
Class 7 0.0052 0.3843 0.1750 0.1448 0.0859 0.2048
PREV_TRAMO_REC
Y = 1 Y = 2 Y = 3 Y = 4 Y = 5
Class 1 0.0000 0.0915 0.5565 0.3520 0.0000
Class 2 0.0276 0.0370 0.6457 0.2347 0.0550
Class 3 0.0048 0.0000 0.6095 0.2880 0.0977
Class 4 0.0008 0.0032 0.6439 0.3521 0.0000
Class 5 0.0000 0.7662 0.0349 0.1908 0.0082
Class 6 0.0300 0.0043 0.5214 0.1636 0.2808
Class 7 0.0017 0.0701 0.7223 0.2006 0.0053
save.image("data2_lca23.RData")
require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
tibble::as_tibble(sesion_info$packages),
c(package, loadedversion, source)
) %>%
DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
'', htmltools::em('Packages')),
options=list(
initComplete = htmlwidgets::JS(
"function(settings, json) {",
"$(this.api().tables().body()).css({
'font-family': 'Helvetica Neue',
'font-size': '50%',
'code-inline-font-size': '15%',
'white-space': 'nowrap',
'line-height': '0.75em',
'min-height': '0.5em'
});",#;
"}")))