8 min read

Análisis de clases latentes II

library(foreign)
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.6
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(knitr)
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
library(poLCA)
## Loading required package: scatterplot3d
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(parallel)
library(broom)

migracion <- read.spss("http://www.losmexicanos.unam.mx/migracion/encuesta_nacional/base_datos/Encuesta_Nacional_de_Migracion.sav", 
                       to.data.frame = TRUE)
## re-encoding from CP1252
diccionario <- tibble(nombre = names(migracion), 
                    etiquetas = str_trim(attr(migracion, "variable.labels")))
migracion <- as.tibble(migracion)

f <- cbind(p7_1,p7_2,p7_3,p7_4,p7_5,p7_6,p7_7,p7_8,p7_9,p7_10)~1

set.seed(2018)
migracion %>% 
  dplyr::select(starts_with("p7_")) %>%  
  poLCA(f, ., nclass = 5, nrep = 4) -> LCA_3clases
## Model 1: llik = -8908.353 ... best llik = -8908.353
## Model 2: llik = -8890.167 ... best llik = -8890.167
## Model 3: llik = -8906.306 ... best llik = -8890.167
## Model 4: llik = -8911.801 ... best llik = -8890.167
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $p7_1
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.7245 0.1585 0.1080 0.0090 0.0000
## class 2:  0.1606 0.7758 0.0636 0.0000 0.0000
## class 3:  0.2630 0.1514 0.1131 0.4173 0.0552
## class 4:  0.1981 0.2488 0.5442 0.0089 0.0000
## class 5:  0.9364 0.0148 0.0464 0.0024 0.0000
## 
## $p7_2
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.7392 0.1965 0.0577 0.0065 0.0000
## class 2:  0.1394 0.8237 0.0369 0.0000 0.0000
## class 3:  0.1212 0.1704 0.0596 0.5659 0.0828
## class 4:  0.0932 0.3050 0.5929 0.0090 0.0000
## class 5:  0.9819 0.0047 0.0063 0.0072 0.0000
## 
## $p7_3
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.7712 0.1834 0.0454 0.0000 0.0000
## class 2:  0.1225 0.8351 0.0423 0.0000 0.0000
## class 3:  0.2219 0.1801 0.0182 0.5247 0.0552
## class 4:  0.1555 0.3646 0.4799 0.0000 0.0000
## class 5:  0.9815 0.0167 0.0018 0.0000 0.0000
## 
## $p7_4
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.2819 0.3845 0.3295 0.0041 0.0000
## class 2:  0.1044 0.6639 0.1923 0.0337 0.0056
## class 3:  0.0281 0.0837 0.1697 0.6079 0.1106
## class 4:  0.0000 0.1205 0.8724 0.0071 0.0000
## class 5:  0.9477 0.0224 0.0127 0.0172 0.0000
## 
## $p7_5
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.6061 0.2490 0.1329 0.0032 0.0088
## class 2:  0.0799 0.8443 0.0532 0.0226 0.0000
## class 3:  0.0957 0.2769 0.0947 0.4601 0.0726
## class 4:  0.0675 0.3051 0.6199 0.0075 0.0000
## class 5:  0.9781 0.0103 0.0092 0.0024 0.0000
## 
## $p7_6
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.2422 0.3519 0.3823 0.0236 0.0000
## class 2:  0.0394 0.6912 0.1821 0.0873 0.0000
## class 3:  0.0000 0.0828 0.2267 0.5798 0.1107
## class 4:  0.0410 0.1471 0.8119 0.0000 0.0000
## class 5:  0.9061 0.0499 0.0194 0.0223 0.0024
## 
## $p7_7
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.7386 0.2110 0.0504 0.0000 0.0000
## class 2:  0.1245 0.8125 0.0630 0.0000 0.0000
## class 3:  0.3459 0.1155 0.1106 0.3452 0.0828
## class 4:  0.2806 0.3162 0.3943 0.0089 0.0000
## class 5:  0.9906 0.0046 0.0000 0.0048 0.0000
## 
## $p7_8
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.6770 0.2684 0.0515 0.0031 0.0000
## class 2:  0.1205 0.8681 0.0000 0.0114 0.0000
## class 3:  0.1192 0.1849 0.1384 0.4747 0.0828
## class 4:  0.0362 0.3172 0.6392 0.0074 0.0000
## class 5:  0.9880 0.0120 0.0000 0.0000 0.0000
## 
## $p7_9
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.7065 0.2484 0.0418 0.0033 0.0000
## class 2:  0.0897 0.9103 0.0000 0.0000 0.0000
## class 3:  0.2183 0.0912 0.0693 0.5384 0.0828
## class 4:  0.0637 0.3776 0.5587 0.0000 0.0000
## class 5:  0.9975 0.0025 0.0000 0.0000 0.0000
## 
## $p7_10
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
## class 1:  0.3122 0.2894 0.3984 0.0000 0.0000
## class 2:  0.0599 0.7105 0.1729 0.0453 0.0113
## class 3:  0.0143 0.0278 0.1880 0.6732 0.0967
## class 4:  0.0177 0.1174 0.8649 0.0000 0.0000
## class 5:  0.9730 0.0082 0.0063 0.0102 0.0024
## 
## Estimated class population shares 
##  0.259 0.1473 0.0604 0.1866 0.3467 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.2585 0.1501 0.0601 0.1852 0.3461 
##  
## ========================================================= 
## Fit for 5 latent classes: 
## ========================================================= 
## number of observations: 1199 
## number of estimated parameters: 204 
## residual degrees of freedom: 995 
## maximum log-likelihood: -8890.167 
##  
## AIC(5): 18188.33
## BIC(5): 19226.54
## G^2(5): 6066.052 (Likelihood ratio/deviance statistic) 
## X^2(5): 17940695627 (Chi-square goodness of fit) 
## 
etiquetas_item <- tibble(outcome = 1:length(levels(migracion$p7_1)), 
                         etiqueta_item = levels(migracion$p7_1)
)

LCA_3clases %>% 
  tidy %>% 
  left_join(dplyr::rename(diccionario, variable = nombre)) %>% 
  left_join(etiquetas_item) %>% 
  mutate(etiquetas = str_remove(etiquetas, 
                                "7 ¿Estaría dispuesto o no estaría dispuesto a permitir que en su casa vivieran personas "), 
         etiquetas = str_wrap(etiquetas, 30),
         etiqueta_item = factor(etiqueta_item, 
                                levels = c("No", "Sí, en parte", "Sí", "NS", "NC")), 
         class = paste("Clase", class)) %>%
  ggplot(aes(x = etiquetas, fill = etiqueta_item, y = estimate)) + 
  geom_col() + 
  facet_wrap(~class, nrow = 1) + 
  scale_fill_viridis_d() + 
  theme_minimal() +
  theme(legend.position = "bottom") + 
  labs(fill = "Respuesta", 
       x = "¿Estaría dispuesto o no estaría dispuesto a
       permitir que en su casa vivieran personas ", 
       y = "Probabilidad de respuesta por clase") + 
  coord_flip()
## Joining, by = "variable"
## Joining, by = "outcome"

Análisis de clases latentes con covariadas

f.cov <- cbind(p7_1,p7_2,p7_3,p7_4,p7_5,p7_6,p7_7,p7_8,p7_9,p7_10)~sd2+sd4

set.seed(2018)
migracion %>% 
  dplyr::select(starts_with("p7_"), sd1, sd2, sd4) %>%
  mutate(sd2 = as.numeric(str_trim(as.character(sd2)))) %>% 
  mutate_all(na_if, "NS") %>% 
  mutate_all(na_if, "NC") %>% 
  drop_na %>%
  mutate(sd4 = str_trim(as.character(sd4))) %>%
  mutate(sd4 = recode(sd4, 
  "Preescolar" = "Pri o <", 
  "Ninguno" = "Pri o <", 
  "Primaria" = "Pri o <",
  "Preparatoria o Bachillerato" = "Prepa", 
  "Carrera técnica" = "Prepa", 
  "Normal" = "Prepa", 
  "Licenciatura (Profesional)" = "Univ", 
  "Maestría" = "Univ", 
  "Doctorado" = "Univ"
  )) %>%  
  mutate_if(is.factor, droplevels) %>% 
  mutate(sd4 = fct_relevel(sd4, "Pri o <")) -> datos
## Warning in evalq(as.numeric(str_trim(as.character(sd2))), <environment>):
## NAs introducidos por coerción
  poLCA(f.cov, datos, nclass = 3, nrep = 6) -> LCAcov_3clases
## Model 1: llik = -7228.091 ... best llik = -7228.091
## Model 2: llik = -7067.375 ... best llik = -7067.375
## Model 3: llik = -7067.375 ... best llik = -7067.375
## Model 4: llik = -7067.375 ... best llik = -7067.375
## Model 5: llik = -7259.869 ... best llik = -7067.375
## Model 6: llik = -7067.375 ... best llik = -7067.375
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $p7_1
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.1928 0.1318 0.6754
## class 2:  0.3161 0.5606 0.1233
## class 3:  0.9231 0.0279 0.0490
## 
## $p7_2
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.1412 0.1639 0.6949
## class 2:  0.2884 0.6074 0.1043
## class 3:  0.9592 0.0255 0.0154
## 
## $p7_3
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.2045 0.2182 0.5773
## class 2:  0.3070 0.6161 0.0769
## class 3:  0.9528 0.0370 0.0103
## 
## $p7_4
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.0000 0.0793 0.9207
## class 2:  0.1361 0.5146 0.3493
## class 3:  0.7924 0.1158 0.0918
## 
## $p7_5
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.0807 0.1362 0.7831
## class 2:  0.2087 0.6676 0.1237
## class 3:  0.9251 0.0499 0.0249
## 
## $p7_6
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.0650 0.0719 0.8631
## class 2:  0.0942 0.5714 0.3344
## class 3:  0.7528 0.1239 0.1233
## 
## $p7_7
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.3258 0.1905 0.4837
## class 2:  0.3075 0.6147 0.0778
## class 3:  0.9611 0.0277 0.0112
## 
## $p7_8
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.0865 0.1770 0.7366
## class 2:  0.2258 0.6863 0.0879
## class 3:  0.9582 0.0336 0.0082
## 
## $p7_9
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.1170 0.2112 0.6719
## class 2:  0.2238 0.7139 0.0623
## class 3:  0.9724 0.0209 0.0067
## 
## $p7_10
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.0070 0.0645 0.9285
## class 2:  0.1278 0.5100 0.3622
## class 3:  0.8175 0.0775 0.1050
## 
## Estimated class population shares 
##  0.1504 0.3289 0.5207 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.1496 0.3267 0.5237 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## 2 / 1 
##               Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)       0.40029     0.44898    0.892     0.373
## sd2              -0.00746     0.00796   -0.938     0.349
## sd4Prepa          0.97084     0.32532    2.984     0.003
## sd4Secundaria     0.94808     0.29100    3.258     0.001
## sd4Univ           0.74154     0.45650    1.624     0.105
## ========================================================= 
## 3 / 1 
##               Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)       1.47085     0.40736    3.611     0.000
## sd2              -0.01907     0.00739   -2.581     0.010
## sd4Prepa          0.72662     0.29188    2.489     0.013
## sd4Secundaria     0.55436     0.26535    2.089     0.037
## sd4Univ           1.29784     0.39714    3.268     0.001
## ========================================================= 
## number of observations: 1056 
## number of estimated parameters: 70 
## residual degrees of freedom: 986 
## maximum log-likelihood: -7067.375 
##  
## AIC(3): 14274.75
## BIC(3): 14622.11
## X^2(3): 162215.4 (Chi-square goodness of fit) 
##  
## ALERT: estimation algorithm automatically restarted with new initial values 
## 
LCAcov_3clases %>% 
  tidy %>% 
  left_join(dplyr::rename(diccionario, variable = nombre)) %>% 
  left_join(etiquetas_item) %>% 
  mutate(etiquetas = str_remove(etiquetas, 
                                "7 ¿Estaría dispuesto o no estaría dispuesto a permitir que en su casa vivieran personas "), 
         etiquetas = str_wrap(etiquetas, 30),
         etiqueta_item = factor(etiqueta_item, 
                                levels = c("No", "Sí, en parte", "Sí", "NS", "NC")), 
         class = paste("Clase", class)) %>%
  ggplot(aes(x = etiquetas, fill = etiqueta_item, y = estimate)) + 
  geom_col() + 
  facet_wrap(~class, nrow = 1) + 
  scale_fill_viridis_d() + 
  theme_minimal() +
  theme(legend.position = "bottom") + 
  labs(fill = "Respuesta", 
       x = "¿Estaría dispuesto o no estaría dispuesto a
       permitir que en su casa vivieran personas ", 
       y = "Probabilidad de respuesta por clase") + 
  coord_flip()
## Joining, by = "variable"
## Joining, by = "outcome"

plot(effects::allEffects(LCAcov_3clases))