Paso 2.14

Análisis de clases latentes exploratoria y comparativa con predictores, sin pueblo originario, sin año y recategorización de edad mujer y semana gestacional hito 1

Andrés González Santa Cruz
May 13, 2023
Show code
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"
Show code
 $(document).ready(function() {
    $('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
    // onClick function for all plots (img's)
    $('img:not(.zoomImg)').click(function() {
      $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
      $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
    });
    // onClick function for zoomImg
    $('img.zoomImg').click(function() {
      $('.zoomDiv').css({opacity: '0', width: '0%'}); 
    });
  });
  
Show code
<script src="hideOutput.js"></script> 
Show code
$(document).ready(function() {    
    $chunks = $('.fold');    
    $chunks.each(function () {      // add button to source code chunks     
    if ( $(this).hasClass('s') ) {       
        $('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
            $('pre.r', this).children('code').attr('class', 'folded');     
            }      // add button to output chunks     
        if ( $(this).hasClass('o') ) {       
            $('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");       
            $('pre:not(.r)', this).children('code:not(r)').addClass('folded');        // add button to plots       
            $(this).find('img').wrap('<pre class=\"plot\"></pre>');       
            $('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");       
            $('pre.plot', this).children('img').addClass('folded');      
            }   
});    // hide all chunks when document is loaded   
    $('.folded').css('display', 'none')    // function to toggle the visibility   
    $('.showopt').click(function() {     
            var label = $(this).html();     
            if (label.indexOf("Show") >= 0) {       
                $(this).html(label.replace("Show", "Hide"));     
            } else {
              $(this).html(label.replace("Hide", "Show"));     
            }     
    $(this).siblings('code, img').slideToggle('fast', 'swing');   
    }); 
}); 

Cargamos los datos

Show code
rm(list = ls());gc()
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  653002 34.9    1307962 69.9   924086 49.4
Vcells 1172614  9.0    8388608 64.0  2056718 15.7
Show code
[1] "2023-05-13 18:42:38 -04"
Show code
load("data2_lca2_sin_po_ano_2023_05_13.RData")

Cargamos los paquetes

Show code
knitr::opts_chunk$set(echo = TRUE)
pacman::p_unlock()
if(!require(distill)){install.packages("distill")}
if(!require(poLCA)){install.packages("poLCA")}
if(!require(devtools)){install.packages("devtools")}
if(!require(poLCAParallel)){devtools::install_github("QMUL/poLCAParallel@package")}
if(!require(compareGroups)){install.packages("compareGroups")}
if(!require(parallel)){install.packages("parallel")}
if(!require(Hmisc)){install.packages("Hmisc")}
if(!require(tidyverse)){install.packages("tidyverse")}
try(if(!require(sjPlot)){install.packages("sjPlot")})
if(!require(emmeans)){install.packages("emmeans")}
if(!require(nnet)){install.packages("nnet")}
if(!require(here)){install.packages("here")}
if(!require(doParallel)){install.packages("doParallel")}
if(!require(progress)){install.packages("progress")}
if(!require(caret)){install.packages("caret")}
if(!require(rpart)){install.packages("rpart")}
if(!require(rpart.plot)){install.packages("rpart.plot")}
if(!require(partykit)){install.packages("partykit")}
if(!require(randomForest)){install.packages("randomForest")}
if(!require(ggcorrplot)){install.packages("ggcorrplot")}
if(!require(polycor)){install.packages("polycor")}
if(!require(tableone)){install.packages("tableone")}
if(!require(broom)){install.packages("broom")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rsvg)){install.packages("rsvg")}
if(!require(janitor)){install.packages("janitor")}
if(!require(job)){install.packages("job")}
if(!require(missForest)){install.packages("missForest")}
if(!require(DT)){install.packages("DT")}
if(!require(DiagrammeR)){install.packages("DiagrammeR")}
if(!require(broom.mixed)){install.packages("broom.mixed")}
if(!require(corrplot)){install.packages("corrplot")}
if(!require(dlookr)){install.packages("dlookr")}
#if(!require(lcra)){install.packages("lcra")} #jags problem
if(!require(effects)){install.packages("effects")}
if(!require(missRanger)){install.packages("missRanger")}
if(!require(effects)){install.packages("effects")}
if(!require(fidelius)){install.packages("fidelius")}
if(!require(finalfit)){install.packages("finalfit")}
if(!require(glca)){install.packages("glca")}
if(!require(rio)){install.packages("rio")}
if(!require(DiagrammeRsvg)){install.packages("DiagrammeRsvg")}

#if(!require(poLCA)){githubinstall::gh_install_packages("poLCA", ref = github_pull("14"))}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

Definimos ciertas constantes

Show code
clus_iter= 500 #500
n_thread <- parallel::detectCores()
nrep <- clus_iter # number of different initial values (could be n_thread too)
n_class_max <- 10 # maximum number of classes to investigate
n_bootstrap <- 1#100 #30 # 50 number of bootstrap samples
print(n_thread)
[1] 8

Análisis de clases latentes

Show code
library(DiagrammeR) #⋉
gr_lca2<-
DiagrammeR::grViz("
digraph flowchart {
    fontname='Comic Sans MS'

  # Nodes
  subgraph samelevel {
    CAUSAL [label = 'Causal',fontsize=10,shape = box]
    MUJER_REC [label = 'Sexo\n(mujer)',fontsize=10,shape = box]
    EDAD_MUJER_REC [label = 'Edad\nmujer',fontsize=10,shape = box]
    HITO1_EDAD_GEST_SEM_REC [label = 'Edad\nGestacional\nHito 1',fontsize=10,shape = box]
    MACROZONA [label = 'Macrozona',fontsize=10,shape = box]
    PAIS_ORIGEN_REC [label = 'País de\norigen',fontsize=10,shape = box]
    PREV_TRAMO_REC [label = 'Previsión y\ntramo',fontsize=10,shape = box]
    
  {rank=same; rankdir= 'TB'; CAUSAL MUJER_REC EDAD_MUJER_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PAIS_ORIGEN_REC PREV_TRAMO_REC }
  }
  LCA [label= 'Clases\nlatentes', shape= circle, style=filled, color=lightgrey, fontsize=10]
  
  inter [label = 'Interupción\nembarazo',fontsize=10,shape = box, height=.00002] # set the position of the inter node pos='15,100'

  # Nodes
  subgraph {
   LCA ->  {CAUSAL MUJER_REC EDAD_MUJER_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PAIS_ORIGEN_REC PREV_TRAMO_REC } [rank=same; rankdir= 'TB'] 
}
  subgraph {
   LCA -> inter [minlen=14] #minlen es necesario para correr arrowhead = none; 
  {rank=same; LCA inter [rankdir='LR']}; #; 
}
}")#, width = 1200, height = 900
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
DPI = 1200
WidthCM = 21
HeightCM = 8

gr_lca2 %>%
  export_svg %>% charToRaw %>% rsvg_pdf("_flowchart_lca_adj_sin_po_ano.pdf")

gr_lca2 %>% export_svg()%>%charToRaw %>% rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("_flowchart_lca0_adj_sin_po.png")

htmlwidgets::saveWidget(gr_lca2, "_flowchart_lca_adj_sin_po_ano.html")
webshot::webshot("_flowchart_lca_adj_ano.html", "_flowchart_lca_adj_sin_po_ano.png",vwidth = 1200, vheight = 900, zoom = 2)
NULL

Modelo alternativo

Análisis ACL alt

Show code
#Biemer, P. P., & Wiesen, C. (2002). Measurement error evaluation of self-reported drug use: a latent class analysis of the US National Household Survey on Drug Abuse. Journal of the Royal Statistical Society: Series A (Statistics in Society), 165(1), 97–119. doi:10.1111/1467-985x.00612  
#lca_entropia(x="ppio", seed= 2125, k= 8, f= f_preds, dat= mydata_preds, nbr_repet= 30, na_rm= T)
#3
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
# f is the selected variables
# dat is the data
# nb_var is the number of selected variables
# k is the number of latent class generated
# nbr_repet is the number of repetition to  
# reach the convergence of EM algorithm
# x es el código para las variables de los modelos
#seed es el numero random para las semillas. ej: 4345.
#Modo de calcular el mejor modelo.
#z_ # 
#2023-01-20
#https://github.com/QMUL/poLCAParallel/blob/master/exec/3_blrt.R
#0h s
f_adj<-cbind(CAUSAL, EDAD_MUJER_REC, PAIS_ORIGEN_REC, HITO1_EDAD_GEST_SEM_REC, MACROZONA, PREV_TRAMO_REC)~ outcome

seed<-2125
old <- Sys.time()

require(progress)

set.seed(seed)
model_array_adj <- list()  # Initialize an empty list to store the results
pb <- progress_bar$new(total = n_class_max, message_class = "Running poLCA")

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
for (k in 1:n_class_max) {
  nrep_int <- nrep  # Initialize nrep to a reasonable value
  while (nrep_int >= 1) {  # Try running poLCA with decreasing nrep until nrep reaches 1
    tryCatch({
      set.seed(seed)
      mod <- poLCAParallel::poLCA(
        f_adj, 
        mydata_preds3 %>% dplyr::mutate(outcome=ifelse(outcome==1,1,0)), # %>% janitor::tabyl(outcome)
        nclass = k, 
        nrep = nrep_int, 
        maxiter = 1e4,
        n.thread = 12,
        verbose = FALSE
      )
      model_array_adj[[k]] <- mod  # Store the result if no error occurs
      break  # Exit the loop if poLCA succeeds
    }, error = function(e) {
      message(paste("Error in poLCA for k =", k, ", nrep =", nrep_int, ":", conditionMessage(e)))
      nrep_int <- nrep_int / 2  # Reduce nrep by half if poLCA fails
    })
  }
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
  pb$tick()  # Increment the progress bar
  cat(sprintf("\r%d%% completed", round(k/n_class_max*100)))  # Print progress percentage
  Sys.sleep(.05)
}

 ALERT: covariates not allowed when nclass=1;
                 will be ignored. 
 

10% completed
20% completed
30% completed
40% completed
50% completed
60% completed
70% completed
80% completed
90% completed
100% completed
Show code
pb$terminate()  # Close the progress bar
cat(': Done')  # Print "Done" message  
: Done
Show code
model_array_adj_ppio<-model_array_adj

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#BOOTSTRAP#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

new_med<-(Sys.time())
paste0("The model took ",round(new_med-old,2)," until every LCA was computed")
[1] "The model took 17.53 until every LCA was computed"

Luego calculamos la razón de verosimilitud mediante remuestreo bootstrap (BLRT) entre los distintos modelos con el que asume una clase menos.

Show code
# store p values for each nclass, 1 to n_class_max
# store 0 for 1 number of class, ie this says you cannot have zero number of
# classes
p_value_array_adj <- c(0)
# for all number of classes investigated:
#   - store the log likelihood ratio
#   - store all bootstrap samples log likelihoods ratios
fitted_log_ratio_array_adj <- rep(NaN, n_class_max)
bootstrap_log_ratio_array_adj <- list()

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
# do the bootstrap likelihood ratio test for each number of classes
for (nclass in 2:n_class_max) {

  # get the null and alt models
  # these are models with one number of class differences
  null_model_adj <- model_array_adj_ppio[[nclass - 1]]
  alt_model_adj <- model_array_adj_ppio[[nclass]]

  # for each bootstrap sample, store the log likelihood ratio here
  bootstrap_results_adj <- poLCAParallel::blrt(
    null_model_adj, alt_model_adj,
    n_bootstrap, n_thread, nrep
  )

  # log likelihood ratio to compare the two models
  fitted_log_ratio_array_adj[nclass] <- bootstrap_results_adj[["fitted_log_ratio"]]
  # store the log likelihoods ratios for all bootstrap samples
  bootstrap_log_ratio_array_adj[[nclass]] <-
    bootstrap_results_adj[["bootstrap_log_ratio"]]
  # store the p value for this nclass
  p_value_array_adj <- c(p_value_array_adj, bootstrap_results_adj[["p_value"]])
  
  #progress bar
  cat(paste0(round(nclass / n_class_max * 100), '% completed'))
  Sys.sleep(.05)
  if (nclass == n_class_max) cat(': Done')
  else cat('\014')
}
20% completed30% completed40% completed50% completed60% completed70% completed80% completed90% completed100% completed: Done
Show code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
new<-(Sys.time())
time_diff <- (Sys.time() - old)/60
paste0("The model took ",round(new-old,2)," minutes")
[1] "The model took 1.33 minutes"
Show code
model_array_ppio2 <- model_array_adj
fitted_log_ratio_array_adj_ppio <- fitted_log_ratio_array_adj
bootstrap_log_ratio_array_adj_ppio <- bootstrap_log_ratio_array_adj
bootstrap_results_adj_ppio <- bootstrap_results_adj
p_value_array_adj_ppio <- p_value_array_adj

# Get the BIC values for all models in model_array_ppio2
bic_values_adj <- sapply(model_array_ppio2, function(model) model$bic)

# Identify the index of the model with the lowest BIC
best_model_index_adj <- which.min(bic_values_adj)

# Select the best model
LCA_best_model_adj_ppio <- model_array_ppio2[[best_model_index_adj]]
#####################################################################################################################################################################
#Within poLCA, parameter estimates are obtained by a procedure that repeatedly improves estimates.
#This is stopped when no further improvements are obtained, or until a maximum number of iterations is reached. The starting values are the values at which such repetitions were started. Increasing the number 4 R. ACHTERHOF ET AL.of iterations (cycles within each estimation) and setting more different starting values for each repetition results in a greater likelihood that the global (rather than local) maximum of the log-likelihood function (and thus, the best possible solution) is reached. The maximum number of iterations was chosen as 10.000, and 500 different sets of starting values were used (thus going beyond the recommendations by Linzer & Lewis, 2011; Oberski, 2016). As such, the influence of chance was minimized while the reproducibility of the results was maximized# 

Resultados ACL alt

Hicimos un gráfico de los resultados

Show code
# Initialize an empty data frame
tab_ppio2 <- data.frame()##

# Loop through each model
for (i in 2:n_class_max) {
  skip_to_next <- FALSE

  # Get the model and the previous model
  mod2 <- model_array_ppio2[[i]]
  mod2_min1 <- model_array_ppio2[[(i-1)]]

  # Check if the model has valid predictions
  if (is.null(mod2$predclass)) {
    skip_to_next <- TRUE
  }

  # If the model has valid predictions, calculate the measures and add them to the data frame
  if (!skip_to_next) {
    # Number of latent classes
    mod2$C <- max(t(matrix(apply(mod2$y, 2, max))))
    # Number of manifest variables
    mod2$J <- ncol(mod2$y)
    # Total number of items
    mod2$I <- mod2$J * mod2$C
    # Degrees of freedom
    mod2$df <- mod2$C^mod2$I - mod2$npar - 1
    # Chi-square test
    mod2$Chisq.pvalue <- (1 - pchisq(mod2$Chisq, mod2$df))
    # AIC
    mod2$aic <- round(mod2$aic, 2)
    # BIC
    mod2$bic <- round(mod2$bic, 2)
    # Adjusted BIC n*=(n+2)/24 (https://github.com/dlinzer/poLCA/issues/10)
    mod2$aBIC <- round((-2 * mod2$llik) + (log((mod2$N+2)/24) * mod2$npar), 2) 
    # Conditional AIC
    mod2$cAIC <-  round((-2 * mod2$llik) + (2 * mod2$Nobs * log(mod2$N/mod2$Nobs)), 2)
    # approximate weight of evidence criterion  #https://jbds.isdsa.org/public/journals/1/html/v2n2/qiu/
    mod2$awe <-  round((-2 * mod2$llik) + (2 * mod2$npar * log(mod2$Nobs)+1.5), 2)    
    # Gsq: deviance
    mod2$Gsq
    # Likelihood ratio test
    mod2$Gsq.pvalue <- (1 - pchisq(mod2$Gsq, mod2$df))
    # Relative entropy
    mod2$RelEnt <- round(relative.entropy(mod2), 2)
    # Entropy R-squared
    mod2$EntR2 <- round(entropy.R2(mod2), 2)
    # Deviance change
    mod2$DevChange <- round(mod2_min1$Gsq - mod2$Gsq, 2)
    # Degrees of freedom change
    mod2$dfChange <- mod2_min1$resid.df - mod2$resid.df
    # P-value for deviance change
    mod2$pvalDevChange <- round(pchisq(mod2$DevChange, mod2$dfChange, lower.tail = FALSE), 4)
    mod2$BLRT <- round(fitted_log_ratio_array_adj_ppio[[i]],2)
    mod2$BLRT.pvalue <- p_value_array_adj_ppio[[i]]
    # Add the model index to the data frame
    mod2$ModelIndex <- i

    tab_ppio2 <- rbind.data.frame(tab_ppio2, t(data.matrix(mod2[c("llik", "Chisq", "Chisq.pvalue", "resid.df", "aic", "bic", "aBIC", "cAIC", "awe", "Gsq", "Gsq.pvalue", "RelEnt", "EntR2", "DevChange", "dfChange", "pvalDevChange", "ModelIndex","BLRT", "BLRT.pvalue")])))

    #  } else {}
  }
}

# identify the list-like columns
list_cols2 <- sapply(tab_ppio2, is.list)
# unlist the list-like columns
unlisted_cols2 <- lapply(tab_ppio2[list_cols2], unlist)
# bind the unlisted columns as a data frame
tab_ppio2 <- cbind(tab_ppio2[!list_cols2], do.call(cbind, unlisted_cols2))
#Erase rownames
rownames(tab_ppio2) <- NULL

manualcolors2 <- c('indianred1', 'cornflowerblue', 'gray50', 'darkolivegreen4', 'slateblue2', 
                  'firebrick4', 'goldenrod4')
levels2 <- c("llik", "Chisq", "Chisq.pvalue", "resid.df", "aic", "bic", "aBIC", "cAIC", "awe",
            "Gsq", "Gsq.pvalue", "RelEnt", "EntR2", "DevChange", "dfChange",
            "pvalDevChange", "BLRT", "BLRT.pvalue")
labels2 <- c('Log-Verosimilitud', 'Chi2', 'valor p Chi2', 'Grados de libertad', 
            'Criterio de Información\nde Akaike(AIC)'','Criterio de Información\nBayesiano (BIC)')''BIC Ajustado (SABIC)')'"AIC Corregido"o'Peso de evidencia aproximado(awe)')'G-squared/Deviance'e''Valor p G-squared'd''Entropía Relativa'va'Entropía R2' R'Cambio en Deviance\n(con modelo previo)'io'Grados de libertad del cambio'bi'valor p cambio deviance'nc'BLRT'LR'valor p BLRT')RT')
fig_lca_fit2<- tab_ppio2 %>%
  dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
  tidyr::pivot_longer(cols = -ModelIndex, #"evryone but index"
                       names_to = "indices", values_to = "value", values_drop_na = F) %>%
  dplyr::mutate(indices = factor(indices, levels = levels2, labels = labels2)) %>%
  dplyr::filter(grepl("(AIC|BIC|awe)",indices, ignore.case=T))%>%
  dplyr::mutate(ModelIndex= factor(ModelIndex, levels=2:n_class_max)) %>% 
  ggplot(aes(x = ModelIndex, y = value, group = indices, color = indices, linetype = indices)) +
  geom_line(size = 1.5) +
  scale_color_manual(values = manualcolors) +
  #scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  labs(x = "Número de clases"", y=="Valor"", color=="Medida"", linetype=="Medida"))++
  #facet_wrap(.~indices, scales = "free_y", nrow = 4, ncol = 1) +
  theme_bw()

fig_lca_fit2
Show code
ggsave("_fig2_comparison_adj_sin_po_ano.png",fig_lca_fit2, dpi=600)

Luego en una tabla

Show code
tab_ppio2 %>%#
  dplyr::select(ModelIndex, everything()) %>% 
    dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
    knitr::kable(format="markdown", caption="Fit measures of models")
Table 1: Fit measures of models
ModelIndex llik Chisq Chisq.pvalue resid.df aic bic aBIC cAIC awe Gsq Gsq.pvalue RelEnt EntR2 DevChange dfChange pvalDevChange BLRT BLRT.pvalue
2 -22706.84 5498612 1 3745 45501.68 45776.24 45636.43 45413.68 46140.29 2678.949 1 0.99 0.99 2662.77 23 0 2714.61 0
3 -22464.96 2921247 1 3722 45063.91 45481.98 45269.09 44929.91 46035.55 2249.400 1 0.89 0.86 429.55 23 0 483.77 0
4 -22282.17 5028935 1 3699 44744.34 45305.93 45019.95 44564.34 46049.01 1899.609 1 0.91 0.89 349.79 23 0 365.57 0
5 -22149.63 4906520 1 3676 44525.27 45230.37 44871.31 44299.27 46162.98 1635.479 1 0.92 0.90 264.13 23 0 265.07 0
6 -22067.42 3856435 1 3653 44406.84 45255.46 44823.32 44134.84 46377.58 1563.260 1 0.86 0.83 72.22 23 0 164.43 0
7 -22068.78 1441782 1 3630 44455.57 45447.71 44942.48 44137.57 46759.34 1562.883 1 0.88 0.84 0.38 23 1 -2.73 1
8 -22098.79 4935727 1 3607 44561.59 45697.24 45118.93 44197.59 47198.40 1617.322 1 0.83 0.78 -54.44 23 1 -60.02 1
9 -22296.32 2212907 1 3584 45002.63 46281.80 45630.41 44592.63 47972.47 1931.952 1 0.85 0.75 -314.63 23 1 -395.04 1
10 -22395.34 3303083 1 3561 45246.68 46669.37 45944.89 44790.68 48549.56 2227.246 1 0.95 0.91 -295.29 23 1 -198.05 1

Presentamos el modelo con mejor ajuste

Show code
print(LCA_best_model_adj_ppio) #
Conditional item response (column) probabilities,
 by outcome variable, for each class (row) 
 
$CAUSAL
          Pr(1)  Pr(2)  Pr(3)  Pr(4)
class 1:      0 0.2020 0.7980 0.0000
class 2:      0 0.0049 0.0000 0.9951
class 3:      0 0.4068 0.5932 0.0000
class 4:      0 0.4626 0.5374 0.0000
class 5:      0 0.0245 0.0000 0.9755

$EDAD_MUJER_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
class 1:  0.0000 0.0000 0.0392 0.4345 0.5262
class 2:  0.0017 0.3482 0.2460 0.2951 0.1089
class 3:  0.0082 0.0181 0.2114 0.4563 0.3060
class 4:  0.0000 0.0089 0.1746 0.5022 0.3143
class 5:  0.0000 0.1647 0.2332 0.4514 0.1507

$PAIS_ORIGEN_REC
           Pr(1)  Pr(2)  Pr(3)
class 1:  0.0000 0.9070 0.0930
class 2:  0.0000 0.9932 0.0068
class 3:  0.0059 0.9941 0.0000
class 4:  0.0121 0.0000 0.9879
class 5:  0.0000 0.0000 1.0000

$HITO1_EDAD_GEST_SEM_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)
class 1:  0.0181 0.3704 0.5720 0.0396
class 2:  0.0088 0.9877 0.0035 0.0000
class 3:  0.0290 0.1733 0.6603 0.1374
class 4:  0.0204 0.1142 0.7360 0.1294
class 5:  0.0192 0.9808 0.0000 0.0000

$MACROZONA
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)
class 1:  0.0000 0.7205 0.0937 0.0498 0.0700 0.0660
class 2:  0.0038 0.3850 0.1751 0.1452 0.0862 0.2047
class 3:  0.0038 0.3205 0.1918 0.1998 0.0949 0.1893
class 4:  0.0000 0.5661 0.1060 0.0623 0.2147 0.0510
class 5:  0.0062 0.5564 0.0869 0.0165 0.2956 0.0384

$PREV_TRAMO_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
class 1:  0.0000 0.7589 0.0040 0.2281 0.0091
class 2:  0.0018 0.0696 0.7237 0.1997 0.0053
class 3:  0.0020 0.0305 0.6254 0.3383 0.0037
class 4:  0.0098 0.0007 0.6260 0.2819 0.0817
class 5:  0.0256 0.0061 0.5214 0.1672 0.2796

Estimated class population shares 
 0.1333 0.1522 0.5469 0.125 0.0425 
 
Predicted class memberships (by modal posterior prob.) 
 0.1196 0.1507 0.5585 0.1288 0.0425 
 
========================================================= 
Fit for 5 latent classes: 
========================================================= 
2 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)     0.66251     0.37710    1.757     0.079
outcome        -0.56627     0.34644   -1.635     0.102
========================================================= 
3 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)     2.91339     0.35778    8.143         0
outcome        -1.69801     0.31475   -5.395         0
========================================================= 
4 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)     1.07397     0.37182    2.888     0.004
outcome        -1.25267     0.33553   -3.733     0.000
========================================================= 
5 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)    -0.91466     0.49563   -1.845     0.065
outcome        -0.24149     0.47695   -0.506     0.613
========================================================= 
number of observations: 3789 
number of estimated parameters: 113 
residual degrees of freedom: 3676 
maximum log-likelihood: -22149.63 
 
AIC(5): 44525.27
BIC(5): 45230.37
X^2(5): 4906520 (Chi-square goodness of fit) 
 
ALERT: estimation algorithm automatically restarted with new initial values 
 
Show code
[1] "2023-05-13 20:05:16 -04"
Show code
save.image("data2_lca2_adj_sin_po_ano.RData")
Show code
require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))