Paso 2.15

Análisis de clases latentes exploratoria y comparativa sin predictores (glca), sin pueblo originario y sin año y recategorización de edad mujer y semana gestacional hito 1

Andrés González Santa Cruz
May 14, 2023
Show code
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"
Show code
 $(document).ready(function() {
    $('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
    // onClick function for all plots (img's)
    $('img:not(.zoomImg)').click(function() {
      $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
      $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
    });
    // onClick function for zoomImg
    $('img.zoomImg').click(function() {
      $('.zoomDiv').css({opacity: '0', width: '0%'}); 
    });
  });
  
Show code
<script src="hideOutput.js"></script> 
Show code
$(document).ready(function() {    
    $chunks = $('.fold');    
    $chunks.each(function () {      // add button to source code chunks     
    if ( $(this).hasClass('s') ) {       
        $('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
            $('pre.r', this).children('code').attr('class', 'folded');     
            }      // add button to output chunks     
        if ( $(this).hasClass('o') ) {       
            $('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");       
            $('pre:not(.r)', this).children('code:not(r)').addClass('folded');        // add button to plots       
            $(this).find('img').wrap('<pre class=\"plot\"></pre>');       
            $('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");       
            $('pre.plot', this).children('img').addClass('folded');      
            }   
});    // hide all chunks when document is loaded   
    $('.folded').css('display', 'none')    // function to toggle the visibility   
    $('.showopt').click(function() {     
            var label = $(this).html();     
            if (label.indexOf("Show") >= 0) {       
                $(this).html(label.replace("Show", "Hide"));     
            } else {
              $(this).html(label.replace("Hide", "Show"));     
            }     
    $(this).siblings('code, img').slideToggle('fast', 'swing');   
    }); 
}); 

Cargamos los datos

Show code
rm(list = ls());gc()
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  653084 34.9    1333251 71.3   897868 48.0
Vcells 1172951  9.0    8388608 64.0  2075161 15.9
Show code
load("data2.RData")

Cargamos los paquetes

Show code
knitr::opts_chunk$set(echo = TRUE)

if(!require(poLCA)){install.packages("poLCA")}
if(!require(poLCAParallel)){devtools::install_github("QMUL/poLCAParallel@package")}
if(!require(compareGroups)){install.packages("compareGroups")}
if(!require(parallel)){install.packages("parallel")}
if(!require(Hmisc)){install.packages("Hmisc")}
if(!require(tidyverse)){install.packages("tidyverse")}
try(if(!require(sjPlot)){install.packages("sjPlot")})
if(!require(emmeans)){install.packages("emmeans")}
if(!require(nnet)){install.packages("nnet")}
if(!require(here)){install.packages("here")}
if(!require(doParallel)){install.packages("doParallel")}
if(!require(progress)){install.packages("progress")}
if(!require(caret)){install.packages("caret")}
if(!require(rpart)){install.packages("rpart")}
if(!require(rpart.plot)){install.packages("rpart.plot")}
if(!require(partykit)){install.packages("partykit")}
if(!require(randomForest)){install.packages("randomForest")}
if(!require(ggcorrplot)){install.packages("ggcorrplot")}
if(!require(polycor)){install.packages("polycor")}
if(!require(tableone)){install.packages("tableone")}
if(!require(DiagrammeR)){install.packages("DiagrammeR")}
if(!require(rsvg)){install.packages("rsvg")}
if(!require(DiagrammeRsvg)){install.packages("DiagrammeRsvg")}
if(!require(webshot)){install.packages("webshot")}
if(!require(htmlwidgets)){install.packages("htmlwidgets")}
if(!require(glca)){install.packages("glca")}

#if(!require(poLCA)){githubinstall::gh_install_packages("poLCA", ref = github_pull("14"))}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
lca_dir<-here::here()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#  
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

tryNA <- function(x){
    x <- try(x)
    if(inherits(x,'try-error')) return(NA)
    x
}

#https://rdrr.io/github/hyunsooseol/snowRMM/src/R/lca.b.R
#https://github.com/dlinzer/poLCA/issues/7

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#' Bivariate residuals for latent class models
#' 
#' Calculate the "bivariate residuals" (BVRs) between pairs of variables 
#' in a latent class model.
#' 
#' This function compares the model-implied (expected) counts in the crosstables
#' of all pairs of observed dependent variables to the observed counts. For each
#' pair, it calculates a "chi-square" statistic,
#' 
#' \deqn{\text{BVR} = \sum_{j, j'} \frac{(n_{jj'} - e_{jj'})^2}{e_{jj'}}},
#' 
#' where \eqn{n_{jj'}} are the observed counts for categories \eqn{j} and \eqn{j'} 
#' of the variables being crosstabulated, and \eqn{e_{jj'}} are
#' the expected counts under the latent class model. 
#' 
#' Note that the BVR does not follow an asymptotic chi-square distribution and
#' for accurate p-values, parametric bootstrapping is necessary (Oberski et al. 2013).
#' 
#' @param fit A poLCA fit object
#' @param tol Optional: tolerance for small expected counts
#' @param rescale_to_df Optional: whether to divide the pairwise "chi-square" values by 
#' the degrees of freedom of the local crosstable. Default is TRUE.
#' @return The table of bivariate residuals
#' @author Daniel Oberski (daniel.oberski@gmail.com)
#' @seealso \code{\link{poLCA}} for fitting the latent class model.
#' @references 
#' Oberski, DL, Van Kollenburg, GH and Vermunt, JK (2013). 
#'   A Monte Carlo evaluation of three methods to detect local dependence in binary data latent class models. 
#'   Advances in Data Analysis and Classification 7 (3), 267-279.
#' @examples
#' data(values)
#' f <- cbind(A, B, C, D) ~ 1
#' M0 <- poLCA(f,values, nclass=1, verbose = FALSE) 
#' bvr(M0) # 12.4, 5.7, 8.3, 15.6, ... 
bvr <- function(fit, tol = 1e-3, rescale_to_df = TRUE) {
  stopifnot(class(fit) == "poLCA")

  ov_names <- names(fit$predcell)[1:(ncol(fit$predcell) - 2)]
  ov_combn <- combn(ov_names, 2)

  get_bvr <- function(ov_pair) {
    form_obs <- as.formula(paste0("observed ~ ", ov_pair[1], " + ", ov_pair[2]))
    form_exp <- as.formula(paste0("expected ~ ", ov_pair[1], " + ", ov_pair[2]))

    counts_obs <- xtabs(form_obs, data = fit$predcell)
    counts_exp <- xtabs(form_exp, data = fit$predcell)
    counts_exp <- ifelse(counts_exp < tol, tol, counts_exp) # Prevent Inf/NaN

    bvr_df <- prod(dim(counts_exp) - 1)
    bvr_value <- sum((counts_obs - counts_exp)^2 / counts_exp)

    if(rescale_to_df) bvr_value <- bvr_value / bvr_df

    attr(bvr_value, "df") <- bvr_df

    bvr_value
  }

  bvr_pairs <- apply(ov_combn, 2, get_bvr)

  attr(bvr_pairs, "rescale_to_df") <- rescale_to_df
  attr(bvr_pairs, "class") <- "dist"
  attr(bvr_pairs, "Size") <- length(ov_names)
  attr(bvr_pairs, "Labels") <- ov_names
  attr(bvr_pairs, "Diag") <- FALSE
  attr(bvr_pairs, "Upper") <- FALSE

  bvr_pairs
}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
poLCA.entropy.fix <- function (lc)
{
  K.j <- sapply(lc$probs, ncol)
  fullcell <- expand.grid(sapply(K.j, seq, from = 1))
  P.c <- poLCA.predcell(lc, fullcell)
  return(-sum(P.c * log(P.c), na.rm = TRUE))
}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#Calculate entropy R2 for poLCA model

# MIT license
# Author: Daniel Oberski
# Input: result of a poLCA model fit
# Output: entropy R^2 statistic (Vermunt & Magidson, 2013, p. 71)
# See: daob.nl/wp-content/uploads/2015/07/ESRA-course-slides.pdf
# And: https://www.statisticalinnovations.com/wp-content/uploads/LGtecnical.pdf
machine_tolerance <- sqrt(.Machine$double.eps)
entropy.R2 <- function(fit) {
  entropy <- function(p) {
    p <- p[p > machine_tolerance] # since Lim_{p->0} p log(p) = 0
    sum(-p * log(p))
  }
  error_prior <- entropy(fit$P) # Class proportions
  error_post <- mean(apply(fit$posterior, 1, entropy))
  R2_entropy <- (error_prior - error_post) / error_prior
  R2_entropy
}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#http://researchdata.gla.ac.uk/879/1/Survey_data_processed_using_R.pdf
##Function to plot variable probabilites by latent class

## Function to undertake chisquare analayis and plot graphs of residuals and contributions
chisquaretest.predictions.function <-
 function(indfactor.data,
   predclass.data,
   noclasses,
   pitem,
   gitem,
   chirows,
   chicols) {
   chisquare.results <- chisq.test(indfactor.data, predclass.data)
   residuals.data <- chisquare.results$residuals
   colnames(residuals.data) <- chicols
   rownames(residuals.data) <- chirows
     title.text <-
       paste(
       "Residuals: chi Square Crosstabulation of\n",
       pitem,
       "and",
       gitem,
       "\n(Chisquare =",
       round(chisquare.results$statistic, 3),
       " p <",
       round(chisquare.results$p.value, 3),
       ")",
       sep = " "
       )
     corrplot(
       residuals.data,
       is.cor = FALSE,
       title = title.text,
       mar = c(0, 0, 4, 0)
       )
     contrib.data <-
     100 * residuals.data ^ 2 / chisquare.results$statistic
     round(contrib.data, 3)
     colnames(contrib.data) <- chicols
     rownames(contrib.data) <- chirows
     title.text <-
     paste(
       "Contributions: chi Square Crosstabulation of\n",
       pitem,
       "and",
       gitem,
       "\n(Chisquare =",
       round(chisquare.results$statistic, 3),
       " p <",
       round(chisquare.results$p.value, 3),
       ")",
       sep = " "
       )
     corrplot(
       contrib.data,
       is.cor = FALSE,
       title = title.text,
       mar = c(0, 0, 4, 0)
       )
     return(chisquare.results)
 }
##Funciton for Cramers V test
cv.test = function(x, y) {
   CV = sqrt(chisq.test(x, y, correct = FALSE)$statistic /
   (length(x) * (min(
   length(unique(x)), length(unique(y))
   ) - 1)))
   print.noquote("Cramér V / Phi:"))
   return(as.numeric(CV))
  }

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

if(.Platform$OS.type == "windows") withAutoprint({
  memory.size()
  memory.size(TRUE)
  memory.limit(size=56000)
})

path<-try(dirname(rstudioapi::getSourceEditorContext()$path))

options(knitr.kable.NA = '')


#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

knitr::knit_hooks$set(time_it = local({
  now <- NULL
  function(before, options) {
    if (before) {
      # record the current time before each chunk
      now <<- Sys.time()
    } else {
      # calculate the time difference after a chunk
      res <- ifelse(difftime(Sys.time(), now)>(60^2),difftime(Sys.time(), now)/(60^2),difftime(Sys.time(), now)/(60^1))
      # return a character string to show the time
      x<-ifelse(difftime(Sys.time(), now)>(60^2),paste("Time for this code chunk to run:", round(res,1), "hours"),paste("Time for this code chunk to run:", round(res,1), "minutes"))
      paste('<div class="message">', gsub('##', '\n', x),'</div>', sep = '\n')
    }
  }
}))
knitr::opts_chunk$set(time_it = TRUE)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

#to format rows in bold
format_cells <- function(df, rows ,cols, value = c("italics", "bold", "strikethrough")){

  # select the correct markup
  # one * for italics, two ** for bold
  map <- setNames(c("*", "**", "~~"), c("italics", "bold", "strikethrough"))
  markup <- map[value]  

  for (r in rows){
    for(c in cols){

      # Make sure values are not factors
      df[[c]] <- as.character( df[[c]])

      # Update formatting
      df[r, c] <- ifelse(nchar(df[r, c])==0,"",paste0(markup, gsub(" ", "", df[r, c]), markup))
    }
  }

  return(df)
}
#To produce line breaks in messages and warnings
knitr::knit_hooks$set(
   error = function(x, options) {
     paste('\n\n<div class="alert alert-danger">',
           gsub('##', '\n', gsub('^##\ Error', '**Error**', x)),
           '</div>', sep = '\n')
   },
   warning = function(x, options) {
     paste('\n\n<div class="alert alert-warning">',
           gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)),
           '</div>', sep = '\n')
   },
   message = function(x, options) {
     paste('<div class="message">',
           gsub('##', '\n', x),
           '</div>', sep = '\n')
   }
)
#
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

as.data.frame.TableOne <- function(x, ...) {capture.output(print(x,
                          showAllLevels = TRUE, ...) -> x)
  y <- as.data.frame(x)
  y$charactersitic <- dplyr::na_if(rownames(x), "")
  y <- y %>%
  fill(charactersitic, .direction = "down") %>%
  select(charactersitic, everything())
  rownames(y) <- NULL
  y}

Definimos ciertas constantes

Show code
clus_iter= 500#500 #500
n_thread <- parallel::detectCores()
nrep <- clus_iter # number of different initial values (could be n_thread too)
n_class_max <- 10 # maximum number of classes to investigate
n_bootstrap <- 100#00 #30 # 50 number of bootstrap samples
print(n_thread)
[1] 8

Time for this code chunk to run: 0 minutes

Análisis de clases latentes

Especificar modelo

Los valores ceros se declararon datos perdidos.

Show code
#table(data2$MACROZONA,data2$REGION_RESIDENCIA, exclude=NULL)
preds <- c("CAUSAL","EDAD_MUJER_REC", "PUEBLO_ORIGINARIO_REC","PAIS_ORIGEN_REC", "HITO1_EDAD_GEST_SEM_REC", "MACROZONA", "PREV_TRAMO_REC")

#c("AÑO","MES_NUM",    "HITO3_COMPLICACION_POST_IVE", "HITO3_CONDICION_MUJER_POST_IVE", "HITO3_TIPO_ATENCION",  "HITO3_SERV_SALUD_ESTABLECIMIENTO", "HITO4_MUJER_ACEPTA_ACOM"),

mydata_preds <- data2


#comprobar 
#table(mydata_preds$sus_ini_mod_mvv, data2$sus_ini_mod_mvv, exclude=NULL)

f_preds <- item(CAUSAL, EDAD_MUJER_REC, PAIS_ORIGEN_REC,HITO1_EDAD_GEST_SEM_REC, MACROZONA, PREV_TRAMO_REC) ~ 1

Time for this code chunk to run: 0 minutes

Exploración

Modificamos la base de datos para incluir la variable resultado.

Show code
#dropped region name because it was too different,many categories
preds2 <-c(setdiff(preds,""),"HITO2_DECISION_MUJER_IVE")  

mydata_preds2 <- data2%>% 
    dplyr::mutate(EDAD_MUJER_REC= factor(dplyr::case_when(EDAD_MUJER<18~"1.<18", EDAD_MUJER>=18 & EDAD_MUJER<25~"2.18-24", EDAD_MUJER>=25 & EDAD_MUJER<35~"3.25-35",EDAD_MUJER>=35~"4.>=35")), HITO1_EDAD_GEST_SEM_REC= factor(dplyr::case_when(HITO1_EDAD_GESTACIONAL_SEMANAS<14~"1.0-13 semanas", HITO1_EDAD_GESTACIONAL_SEMANAS>=14 & HITO1_EDAD_GESTACIONAL_SEMANAS<28~"2.14-27 semanas", HITO1_EDAD_GESTACIONAL_SEMANAS>=28~ "3.>=28 semanas")))%>% dplyr::mutate(across(preds, ~ as.numeric(factor(.))+1))%>% dplyr::mutate(across(preds, ~ dplyr::case_when(is.na(.)~ 1, T~ .)))  %>% dplyr::select(any_of(preds2)) %>% dplyr::mutate(outcome= factor(as.numeric(grepl('INTERRUMPIR', HITO2_DECISION_MUJER_IVE)))) 

Time for this code chunk to run: 0.1 minutes

Recodificación datos

Definimos los datos

Show code
mydata_preds3 <-
  mydata_preds2 %>% 
#dplyr::mutate_if(is.numeric, ~as.character(.)) %>% #si convierto a caracter entrega errores
  data.table::data.table(.)

Time for this code chunk to run: 0 minutes

Así se ven los datos como los usa glca

Corremos glca.

Análisis

Show code
#Biemer, P. P., & Wiesen, C. (2002). Measurement error evaluation of self-reported drug use: a latent class analysis of the US National Household Survey on Drug Abuse. Journal of the Royal Statistical Society: Series A (Statistics in Society), 165(1), 97–119. doi:10.1111/1467-985x.00612  
#lca_entropia(x="ppio", seed= 2125, k= 8, f= f_preds, dat= mydata_preds, nbr_repet= 30, na_rm= T)
#3
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
# f is the selected variables
# dat is the data
# nb_var is the number of selected variables
# k is the number of latent class generated
# nbr_repet is the number of repetition to  
# reach the convergence of EM algorithm
# x es el código para las variables de los modelos
#seed es el numero random para las semillas. ej: 4345.
#Modo de calcular el mejor modelo.
#z_ # 
#2023-01-20
#https://github.com/QMUL/poLCAParallel/blob/master/exec/3_blrt.R
#0h s
seed<-2125
old <- Sys.time()

lca2 <- glca(f_preds, data = mydata_preds3, nclass = 2, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca3 <- glca(f_preds, data = mydata_preds3, nclass = 3, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca4 <- glca(f_preds, data = mydata_preds3, nclass = 4, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca5 <- glca(f_preds, data = mydata_preds3, nclass = 5, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca6 <- glca(f_preds, data = mydata_preds3, nclass = 6, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca7 <- glca(f_preds, data = mydata_preds3, nclass = 7, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca8 <- glca(f_preds, data = mydata_preds3, nclass = 8, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca9 <- glca(f_preds, data = mydata_preds3, nclass = 9, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)
lca10 <- glca(f_preds, data = mydata_preds3, nclass = 10, seed = seed, verbose = FALSE, n.init = 5e2, decreasing=T, maxiter = 1e4,testiter = 500)

new_med<-(Sys.time())
paste0("The model took ",round(new_med-old,2)," until every LCA was computed")
[1] "The model took 31.15 until every LCA was computed"

Time for this code chunk to run: 0.5 minutes

Luego calculamos la razón de verosimilitud mediante remuestreo bootstrap (BLRT) entre los distintos modelos con el que asume una clase menos.

Show code
gof<-
gofglca(lca2, lca3, lca4, lca5, lca6, lca7, lca8, lca9, lca10, test = "chisq")

bootlrt<-
gofglca(lca2, lca3, lca4, lca5, lca6, lca7, lca8, lca9, lca10, test = "boot", nboot=n_bootstrap, seed=2125)

Time for this code chunk to run: 0.1 minutes

Resultados

Hicimos un gráfico de los resultados

Show code
manualcolors <- c('indianred1', 'cornflowerblue', 'gray50', 'darkolivegreen4', 'slateblue2', 
                  'firebrick4', 'goldenrod4')
levels3 <- c("logLik", "Gsq", "AIC", "CAIC", "BIC", "entropy", "Res.Df")
labels3 <- c('Log-Verosimilitud', 'Chi2', 'Criterio de Información\nde Akaike(AIC)'','AIC Corregido'','Criterio de Información\nBayesiano (BIC)')''Entropía'­a'Grados de libertad residuales')s')
fig_lca_fit3<- cbind.data.frame(rn=2:10,gof$gtable) %>%
  data.frame() %>% 
  dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
  tidyr::pivot_longer(cols = -rn,names_to = "indices", values_to = "value", values_drop_na = F) %>%
  dplyr::mutate(indices = factor(indices, levels = levels3, labels = labels3)) %>%
  dplyr::filter(grepl("(AIC|BIC)",indices, ignore.case=T))%>%
  dplyr::mutate(ModelIndex= factor(rn, levels=2:10)) %>% 
  ggplot(aes(x = ModelIndex, y = value, group = indices, color = indices, linetype = indices)) +
  geom_line(size = 1.5) +
  scale_color_manual(values = manualcolors) +
  #scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  labs(x = "Número de clases"", y=="Valor"", color=="Medida"", linetype=="Medida"))++
  #facet_wrap(.~indices, scales = "free_y", nrow = 4, ncol = 1) +
  theme_bw()

fig_lca_fit3
Show code
ggsave("_fig1_comparison_glca_sin_po_ano.png",fig_lca_fit3, dpi=600)
#International Journal of Workplace Health Management  (Zhang et al., 2018).

Time for this code chunk to run: 0.1 minutes

Luego en una tabla

Show code
cbind.data.frame(rn=2:n_class_max,gof$gtable) %>%#
  dplyr::select(rn, everything()) %>% 
    dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
    knitr::kable(format="markdown", caption="Índices de ajuste modelos")
Table 1: Índices de ajuste modelos
rn logLik AIC CAIC BIC entropy Res.Df Gsq
2 -22732.73 45547.47 45844.30 45803.30 0.9879197 3747 2678.892
3 -22516.45 45156.91 45605.78 45543.78 0.9077552 3726 2246.333
4 -22341.50 44849.00 45449.91 45366.91 0.9039567 3705 1896.424
5 -22209.52 44627.03 45379.98 45275.98 0.9110267 3684 1632.457
6 -22153.43 44556.85 45461.84 45336.84 0.8308409 3663 1520.281
7 -22108.88 44509.77 45566.79 45420.79 0.7844379 3642 1431.193
8 -22074.93 44483.86 45692.91 45525.91 0.7893630 3621 1363.281
9 -22040.28 44456.57 45817.66 45629.66 0.7571282 3600 1293.993
10 -22008.16 44434.31 45947.44 45738.44 0.7562323 3579 1229.738
Show code
best_model<-
as.numeric(cbind.data.frame(rn=2:n_class_max,gof$gtable) %>% dplyr::summarise(which.min(BIC)+1))

Time for this code chunk to run: 0 minutes

Presentamos el modelo con mejor ajuste

Show code
summary(eval(parse(text = paste0("lca",best_model)))) #

Call:
glca(formula = f_preds, data = mydata_preds3, nclass = 5, n.init = 500, 
    decreasing = T, testiter = 500, maxiter = 10000, seed = seed, 
    verbose = FALSE)

Manifest items : CAUSAL EDAD_MUJER_REC PAIS_ORIGEN_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PREV_TRAMO_REC 

Categories for manifest items :
                        Y = 1 Y = 2 Y = 3 Y = 4 Y = 5 Y = 6
CAUSAL                      2     3     4                  
EDAD_MUJER_REC              1     2     3     4     5      
PAIS_ORIGEN_REC             1     2     3                  
HITO1_EDAD_GEST_SEM_REC     1     2     3     4            
MACROZONA                   1     2     3     4     5     6
PREV_TRAMO_REC              1     2     3     4     5      

Model : Latent class analysis 

Number of latent classes : 5 
Number of observations : 3789 
Number of parameters : 104 

log-likelihood : -22209.52 
     G-squared : 1632.457 
           AIC : 44627.03 
           BIC : 45275.98 

Marginal prevalences for latent classes :
Class 1 Class 2 Class 3 Class 4 Class 5 
0.13706 0.54102 0.12651 0.04249 0.15292 

Class prevalences by group :
    Class 1 Class 2 Class 3 Class 4 Class 5
ALL 0.13706 0.54102 0.12651 0.04249 0.15292

Item-response probabilities :
CAUSAL 
         Y = 1  Y = 2  Y = 3
Class 1 0.4647 0.5353 0.0000
Class 2 0.4038 0.5962 0.0000
Class 3 0.1928 0.8072 0.0000
Class 4 0.0233 0.0000 0.9767
Class 5 0.0098 0.0000 0.9902
EDAD_MUJER_REC 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5
Class 1 0.0043 0.0085 0.1703 0.5025 0.3144
Class 2 0.0072 0.0180 0.2106 0.4571 0.3071
Class 3 0.0000 0.0000 0.0405 0.4255 0.5340
Class 4 0.0000 0.1649 0.2335 0.4515 0.1501
Class 5 0.0017 0.3472 0.2463 0.2958 0.1089
PAIS_ORIGEN_REC 
         Y = 1  Y = 2  Y = 3
Class 1 0.0339 0.0431 0.9230
Class 2 0.0002 0.9998 0.0000
Class 3 0.0000 0.9252 0.0748
Class 4 0.0000 0.0000 1.0000
Class 5 0.0000 0.9935 0.0065
HITO1_EDAD_GEST_SEM_REC 
         Y = 1  Y = 2  Y = 3  Y = 4
Class 1 0.0230 0.1163 0.7339 0.1268
Class 2 0.0291 0.1767 0.6594 0.1348
Class 3 0.0150 0.3653 0.5698 0.0499
Class 4 0.0191 0.9809 0.0000 0.0000
Class 5 0.0089 0.9877 0.0035 0.0000
MACROZONA 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5  Y = 6
Class 1 0.0059 0.5683 0.1051 0.0594 0.2083 0.0530
Class 2 0.0022 0.3208 0.1932 0.2004 0.0935 0.1899
Class 3 0.0000 0.7147 0.0922 0.0556 0.0698 0.0677
Class 4 0.0062 0.5563 0.0870 0.0166 0.2960 0.0379
Class 5 0.0042 0.3849 0.1747 0.1452 0.0862 0.2048
PREV_TRAMO_REC 
         Y = 1  Y = 2  Y = 3  Y = 4  Y = 5
Class 1 0.0123 0.0139 0.6086 0.2871 0.0780
Class 2 0.0012 0.0251 0.6227 0.3474 0.0036
Class 3 0.0000 0.8096 0.0000 0.1839 0.0065
Class 4 0.0253 0.0062 0.5225 0.1669 0.2791
Class 5 0.0017 0.0695 0.7236 0.1998 0.0053

Time for this code chunk to run: 0 minutes

Show code
save.image("data2_lca215_sin_po_alt_ano.RData")

Time for this code chunk to run: 0 minutes

Show code
require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))

Time for this code chunk to run: 0.3 minutes