Visualizations

These visualizations are intended as a way to test the integrity and utility of the data export and cleaning workflow.

Recruiting calls data

Cumulative screening/recruiting calls by site.

targets::tar_load(screen_df, store="../_targets")
df <- add_n_calls_to_demog(screen_df)

Calls across time

plot_call_timeseries(df)
Cumulative screening calls by year and site

Figure 10: Cumulative screening calls by year and site

Calls by site

calls_by_site_plot <- function(df) {
  require(dplyr)
  df %>%
    filter(., !is.na(site_id)) %>%
    ggplot(.) +
    aes(fct_infreq(site_id), fill = site_id) +
    geom_bar() +
    theme(axis.text.x = element_text(
      angle = 90,
      vjust = 0.5,
      hjust = 1
    )) + # Rotate text
    labs(x = "site") +
    theme(legend.position = "none")
}

calls_by_site_plot(screen_df)
Cumulative screening calls by site

Figure 11: Cumulative screening calls by site

Demographics

Child age

Child age in months (child_age_mos) by child_sex.

screen_df %>%
  dplyr::filter(.,!is.na(child_age_mos),!is.na(child_sex)) %>%
  ggplot() +
  aes(child_age_mos, fill = child_sex) +
  geom_histogram(bins = 50)
Histogram of child age at time of recruiting call.

Figure 12: Histogram of child age at time of recruiting call.

Some of the code to clean the screen_df variables could be incorporated into an earlier stage of the workflow.

Language to child

Language(s) spoken to child by child_sex.

df <- screen_df %>%
  dplyr::mutate(.,
                language_to_child = stringr::str_replace_all(language_to_child, " ", "_"),
                language_spoken_home = stringr::str_replace_all(language_spoken_home, " ", "_"))
xtabs(formula = ~ child_sex + language_to_child,
      data = df)
##          language_to_child
## child_sex english english_other english_spanish english_spanish_other spanish
##    female     187             2              28                     2      10
##    male       181             4              41                     1       7

Language spoken at home

xtabs(formula = ~ child_sex + language_spoken_home, data = df)
##          language_spoken_home
## child_sex english english_other english_spanish english_spanish_other spanish
##    female     188             2              28                     0      10
##    male       173             2              45                     2      11

To child vs. at home

xtabs(formula = ~ language_to_child + language_spoken_home, data = df)
##                        language_spoken_home
## language_to_child       english english_other english_spanish english_spanish_other spanish
##   english                   347             1              14                     2       4
##   english_other               1             3               0                     0       0
##   english_spanish            10             0              54                     0       5
##   english_spanish_other       3             0               0                     0       0
##   spanish                     0             0               5                     0      12

Child born on due date

xtabs(formula = ~ child_sex + child_bornonduedate,
      data = screen_df)
##          child_bornonduedate
## child_sex yes
##    female 151
##    male   162

There are a large number of NAs. Cross-check with the earlier version of the survey. It’s possible that this question was not asked early-on.

Child weight

Must convert pounds and ounces to decimal pounds.

df <- screen_df %>%
  dplyr::mutate(.,
                birth_weight_lbs = child_weight_pounds + child_weight_ounces/16)

df |>
  dplyr::filter(!is.na(birth_weight_lbs), !is.na(child_sex)) |>
  dplyr::filter(birth_weight_lbs > 0) |>
  ggplot() +
  aes(x = birth_weight_lbs, fill = child_sex) +
  geom_histogram(binwidth = 0.33) +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank())

Birth complications

xtabs(formula = ~ child_sex + child_birth_complications,
      data = screen_df)
##          child_birth_complications
## child_sex  no yes
##    female 207  19
##    male   212  20

Major illnesses or injuries

xtabs(formula = ~ child_sex + major_illnesses_injuries,
      data = screen_df)
##          major_illnesses_injuries
## child_sex  no yes
##    female 219   7
##    male   221  11
Child sleep
Bed time
Wake time
Duration

This is work yet-to-be-done. The time stamps need to be reformatted prior to visualization.

Nap hours

Some recoding must be done here to handle strings that can’t easily be converted to numbers.

df <- screen_df |>
  dplyr::mutate(nap_hrs = as.numeric(child_nap_hours))
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `nap_hrs = as.numeric(child_nap_hours)`.
## Caused by warning:
## ! NAs introduced by coercion
df |>
  dplyr::filter(!is.na(nap_hrs), !is.na(child_sex)) |>
  ggplot() +
  aes(x = nap_hrs, fill = child_sex) +
  geom_histogram(bins = 10) +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank())
Sleep location
xtabs(formula = ~ child_sex + child_sleep_location,
      data = screen_df)
##          child_sleep_location
## child_sex bed_parent crib_parent crib_separate crib_sibling other
##    female         14          23           158            9    22
##    male           21          29           163           11     7

Mother

Age at childbirth
screen_df |>
  dplyr::filter(!is.na(mother_childbirth_age), !is.na(child_sex)) |>
  ggplot() +
  aes(x = mother_childbirth_age, fill = child_sex) +
  geom_histogram(bins = 25) +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank())

Clearly, there are some impossible (erroneous) maternal ages > 100. Here are details:

old_moms <- screen_df |>
  dplyr::filter(mother_childbirth_age > 100)

old_moms |>
  dplyr::select(submit_date, site_id, sub_num, mother_childbirth_age) |>
  knitr::kable(format = 'html') |>
  kableExtra::kable_classic()
submit_date site_id sub_num mother_childbirth_age
2020-02-25 GEORG 007 118.92
2020-02-25 GEORG 006 118.01
2022-07-12 VBLTU 005 121.22
2023-04-11 NYUNI 065 136.55
Mother’s race and ethnicity
df <- screen_df %>%
  dplyr::mutate(
    .,
    mother_race = dplyr::recode(
      mother_race,
      morethanone = "more_than_one",
      americanindian = "american_indian"
    ),
    mother_ethnicity = dplyr::recode(
      mother_ethnicity,
      hispanic_or_la = "hispanic",
      not_hispanic_o = "not_hispanic",
      nothispanic = "not_hispanic"
    )
  )
xtabs(formula = ~ mother_race + mother_ethnicity,
      data = df)
##                  mother_ethnicity
## mother_race       hispanic not_hispanic refused
##   american_indian        3            0       0
##   asian                  0           16       0
##   black                  0           13       0
##   more_than_one         14            6       0
##   other                 24            5       1
##   refused                1            1       2
##   white                 48          318       0

Home visit data

targets::tar_load(home_visit_df, store="../_targets")

Demographics

Child age

Child age in months (age_group) by child_sex.

Note: The child’s exact age in months is part of the Databrary-related data. That is on the work plan.

home_visit_df %>%
  dplyr::filter(.,!is.na(age_group),!is.na(child_sex)) %>%
  ggplot() +
  aes(age_group, fill = child_sex) +
  geom_bar() +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank())
Participants by age group and sex

Figure 13: Participants by age group and sex

Language exposure

df <- home_visit_df %>%
  dplyr::mutate(., language_child = stringr::str_replace_all(language_child, " ", "_"))
xtabs(formula = ~ child_sex + language_child, data = df)
##          language_child
## child_sex english english_spanish
##    female     131              24
##    male       130              41

Locomotor milestones

play_loco <- home_visit_df %>%
  dplyr::select(
    .,
    age_group,
    child_sex,
    language_child,
    site_id,
    subject_number,
    locomotor_milestones.who_walk.who_walk_onset_mo,
    locomotor_milestones.k_walk.k_walk_onset_mo,
    locomotor_milestones.crawl_onset.crawl_onset_mo
  ) %>%
  dplyr::rename(
    .,
    walk_mos_who = locomotor_milestones.who_walk.who_walk_onset_mo,
    walk_mos_kea = locomotor_milestones.k_walk.k_walk_onset_mo,
    crawl_mos = locomotor_milestones.crawl_onset.crawl_onset_mo
  ) %>%
  dplyr::mutate(
    .,
    walk_mos_who = as.numeric(walk_mos_who),
    walk_mos_kea = as.numeric(walk_mos_kea),
    crawl_mos = as.numeric(crawl_mos)
  )

Check for anomalous values

crawl_mos_min <- 4
walk_mos_min <- 6
Anomalous crawling onset
play_loco %>%
  dplyr::select(., site_id, subject_number, crawl_mos) %>%
  dplyr::filter(., crawl_mos < crawl_mos_min) %>%
  knitr::kable(format = 'html') %>%
  kableExtra::kable_classic()
site_id subject_number crawl_mos
CHOPH 003 3.72
Anomalous walking onset (KEA criteria)
play_loco %>%
  dplyr::select(., site_id, subject_number, walk_mos_kea) %>%
  dplyr::filter(., walk_mos_kea < walk_mos_min) %>%
  knitr::kable(format = 'html') %>%
  kableExtra::kable_classic()
site_id subject_number walk_mos_kea
Anomalous walking onset (WHO criteria)
play_loco %>%
  dplyr::select(., site_id, subject_number, walk_mos_who) %>%
  dplyr::filter(., walk_mos_who < walk_mos_min) %>%
  knitr::kable(format = 'html') %>%
  kableExtra::kable_classic()
site_id subject_number walk_mos_who

Crawl onset

play_loco %>%
  dplyr::filter(., crawl_mos > crawl_mos_min, !is.na(crawl_mos)) %>%
  ggplot(.) +
  aes(crawl_mos, fill = child_sex) +
  geom_histogram(bins = 12) +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank())
Age of crawling onset (mos) by sex

Figure 14: Age of crawling onset (mos) by sex

Walk onset

play_loco %>%
  dplyr::filter(., walk_mos_kea > walk_mos_min, !is.na(walk_mos_kea)) %>%
  ggplot(.) +
  aes(walk_mos_kea, fill = child_sex) +
  theme(legend.position="bottom") +
  geom_histogram(bins = 10)
Age (mos) of walking onset (KEA criteria) by sex

Figure 15: Age (mos) of walking onset (KEA criteria) by sex

play_loco %>%
  dplyr::filter(., walk_mos_who > walk_mos_min, !is.na(walk_mos_who)) %>%
  ggplot(.) +
  aes(walk_mos_who, fill = child_sex) +
  geom_histogram(bins=12) +
  theme(legend.position="bottom") +
  theme(legend.title = element_blank())
Age (mos) of walking onset (WHO criteria) by sex

Figure 16: Age (mos) of walking onset (WHO criteria) by sex

play_loco %>%
  dplyr::filter(., walk_mos_who > walk_mos_min, !is.na(walk_mos_who), walk_mos_kea > walk_mos_min, !is.na(walk_mos_kea)) %>%
  ggplot(.) +
  aes(walk_mos_who, walk_mos_kea, color = child_sex) +
  geom_point() +
  geom_smooth(method = "lm") +
  xlim(8, 18) +
  ylim(8, 18) +
  theme(legend.position = "bottom") +
  theme(aspect.ratio = 1) +
  theme(legend.title = element_blank()) -> walk_p

ggExtra::ggMarginal(
  walk_p,
  play_loco,
  walk_mos_who,
  walk_mos_kea,
  type = "density",
  margins = "both",
  groupColour = TRUE,
  groupFill = TRUE
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
Walking onset by WHO vs. KEA criteria

Figure 17: Walking onset by WHO vs. KEA criteria

play_loco %>%
  dplyr::filter(., crawl_mos > crawl_mos_min, !is.na(crawl_mos), walk_mos_kea > walk_mos_min, !is.na(walk_mos_kea)) %>%
  ggplot(.) +
  aes(crawl_mos, walk_mos_kea, color = child_sex) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme(legend.position = "bottom") +
  theme(aspect.ratio = 1) +
  theme(legend.title = element_blank()) -> walk_p

ggExtra::ggMarginal(
  walk_p,
  play_loco,
  walk_mos_who,
  walk_mos_kea,
  type = "density",
  margins = "both",
  groupColour = TRUE,
  groupFill = TRUE
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Walking onset vs. Crawling

Figure 18: Walking onset vs. Crawling

Health

Feeding

feeding <- home_visit_df %>%
  dplyr::select(
    .,
    age_group,
    child_sex,
    site_id,
    subject_number,
    language_child,
    health.feeding_nutrition.breastfeed,
    health.feeding_nutrition.solidfood_age
  ) %>%
  dplyr::rename(., breastfeed = health.feeding_nutrition.breastfeed,
                solid_food_mos = health.feeding_nutrition.solidfood_age) %>%
  dplyr::mutate(., solid_food_mos = as.numeric(solid_food_mos))
xtabs(formula = ~ child_sex + breastfeed, data = feeding)
##          breastfeed
## child_sex donotknow  no refused yes
##    female         1   3       0 150
##    male           0  10       1 156
feeding %>%
  ggplot(.) +
  aes(x = solid_food_mos, color = child_sex, fill = child_sex) +
  geom_histogram(bins = 15) +
  theme(legend.title = element_blank())
## Warning: Removed 5 rows containing non-finite values (`stat_bin()`).
Age at introduction of solid foods

Figure 19: Age at introduction of solid foods

Clearly, there are some impossible values here.

feeding |>
  dplyr::select(site_id, subject_number, solid_food_mos) %>%
  dplyr::filter(., solid_food_mos > 12) %>%
  knitr::kable(format = 'html') %>%
  kableExtra::kable_classic()
site_id subject_number solid_food_mos
NYUNI 996 999

Smoking/drinking

smoking_drinking <- home_visit_df %>%
  dplyr::select(
    .,
    age_group,
    child_sex,
    language_child,
    health.smoking.pregnant_smoking,
    health.drinking.pregnant_drinking
  ) %>%
  dplyr::rename(., preg_smoking = health.smoking.pregnant_smoking,
                preg_drinking = health.drinking.pregnant_drinking)

xtabs(formula = ~ preg_smoking + preg_drinking, smoking_drinking)
##             preg_drinking
## preg_smoking  no refused yes
##      no      257       0  61
##      refused   0       1   0

Sleeping position

sleeping_pos <- home_visit_df %>%
  dplyr::select(
    .,
    age_group,
    child_sex,
    language_child,
    health.general_health.child_sleeping_position
  ) %>%
  dplyr::rename(., child_sleeping_position = health.general_health.child_sleeping_position)

xtabs(formula = ~ child_sleeping_position, data = sleeping_pos)
## child_sleeping_position
##         back      changed    donotknow        other      refused         side stomach_side 
##          305            4            1            2            1            2            6

Post-visit data

We load the post-visit survey data.

tar_load(post_visit_df, store="../_targets")

dim(post_visit_df)
## [1] 353 180

Cleaning this data is set aside for future work.