Visualizations
These visualizations are intended as a way to test the integrity and utility of the data export and cleaning workflow.
Recruiting calls data
Cumulative screening/recruiting calls by site.
targets::tar_load(screen_df, store="../_targets")
df <- add_n_calls_to_demog(screen_df)
Calls by site
calls_by_site_plot <- function(df) {
require(dplyr)
df %>%
filter(., !is.na(site_id)) %>%
ggplot(.) +
aes(fct_infreq(site_id), fill = site_id) +
geom_bar() +
theme(axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 1
)) + # Rotate text
labs(x = "site") +
theme(legend.position = "none")
}
calls_by_site_plot(screen_df)
Demographics
Child age
Child age in months (child_age_mos
) by child_sex
.
screen_df %>%
dplyr::filter(.,!is.na(child_age_mos),!is.na(child_sex)) %>%
ggplot() +
aes(child_age_mos, fill = child_sex) +
geom_histogram(bins = 50)
Some of the code to clean the screen_df
variables could be incorporated into an earlier stage of the workflow.
Language to child
Language(s) spoken to child by child_sex
.
df <- screen_df %>%
dplyr::mutate(.,
language_to_child = stringr::str_replace_all(language_to_child, " ", "_"),
language_spoken_home = stringr::str_replace_all(language_spoken_home, " ", "_"))
xtabs(formula = ~ child_sex + language_to_child,
data = df)
## language_to_child
## child_sex english english_other english_spanish english_spanish_other spanish
## female 187 2 28 2 10
## male 181 4 41 1 7
Language spoken at home
xtabs(formula = ~ child_sex + language_spoken_home, data = df)
## language_spoken_home
## child_sex english english_other english_spanish english_spanish_other spanish
## female 188 2 28 0 10
## male 173 2 45 2 11
To child vs. at home
xtabs(formula = ~ language_to_child + language_spoken_home, data = df)
## language_spoken_home
## language_to_child english english_other english_spanish english_spanish_other spanish
## english 347 1 14 2 4
## english_other 1 3 0 0 0
## english_spanish 10 0 54 0 5
## english_spanish_other 3 0 0 0 0
## spanish 0 0 5 0 12
Child born on due date
xtabs(formula = ~ child_sex + child_bornonduedate,
data = screen_df)
## child_bornonduedate
## child_sex yes
## female 151
## male 162
There are a large number of NAs. Cross-check with the earlier version of the survey. It’s possible that this question was not asked early-on.
Child weight
Must convert pounds and ounces to decimal pounds.
df <- screen_df %>%
dplyr::mutate(.,
birth_weight_lbs = child_weight_pounds + child_weight_ounces/16)
df |>
dplyr::filter(!is.na(birth_weight_lbs), !is.na(child_sex)) |>
dplyr::filter(birth_weight_lbs > 0) |>
ggplot() +
aes(x = birth_weight_lbs, fill = child_sex) +
geom_histogram(binwidth = 0.33) +
theme(legend.position = "bottom") +
theme(legend.title = element_blank())
Birth complications
xtabs(formula = ~ child_sex + child_birth_complications,
data = screen_df)
## child_birth_complications
## child_sex no yes
## female 207 19
## male 212 20
Major illnesses or injuries
xtabs(formula = ~ child_sex + major_illnesses_injuries,
data = screen_df)
## major_illnesses_injuries
## child_sex no yes
## female 219 7
## male 221 11
Duration
This is work yet-to-be-done. The time stamps need to be reformatted prior to visualization.
Nap hours
Some recoding must be done here to handle strings that can’t easily be converted to numbers.
df <- screen_df |>
dplyr::mutate(nap_hrs = as.numeric(child_nap_hours))
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `nap_hrs = as.numeric(child_nap_hours)`.
## Caused by warning:
## ! NAs introduced by coercion
df |>
dplyr::filter(!is.na(nap_hrs), !is.na(child_sex)) |>
ggplot() +
aes(x = nap_hrs, fill = child_sex) +
geom_histogram(bins = 10) +
theme(legend.position = "bottom") +
theme(legend.title = element_blank())
Sleep location
xtabs(formula = ~ child_sex + child_sleep_location,
data = screen_df)
## child_sleep_location
## child_sex bed_parent crib_parent crib_separate crib_sibling other
## female 14 23 158 9 22
## male 21 29 163 11 7
Mother
Age at childbirth
screen_df |>
dplyr::filter(!is.na(mother_childbirth_age), !is.na(child_sex)) |>
ggplot() +
aes(x = mother_childbirth_age, fill = child_sex) +
geom_histogram(bins = 25) +
theme(legend.position = "bottom") +
theme(legend.title = element_blank())
Clearly, there are some impossible (erroneous) maternal ages > 100. Here are details:
old_moms <- screen_df |>
dplyr::filter(mother_childbirth_age > 100)
old_moms |>
dplyr::select(submit_date, site_id, sub_num, mother_childbirth_age) |>
knitr::kable(format = 'html') |>
kableExtra::kable_classic()
submit_date | site_id | sub_num | mother_childbirth_age |
---|---|---|---|
2020-02-25 | GEORG | 007 | 118.92 |
2020-02-25 | GEORG | 006 | 118.01 |
2022-07-12 | VBLTU | 005 | 121.22 |
2023-04-11 | NYUNI | 065 | 136.55 |
Mother’s race and ethnicity
df <- screen_df %>%
dplyr::mutate(
.,
mother_race = dplyr::recode(
mother_race,
morethanone = "more_than_one",
americanindian = "american_indian"
),
mother_ethnicity = dplyr::recode(
mother_ethnicity,
hispanic_or_la = "hispanic",
not_hispanic_o = "not_hispanic",
nothispanic = "not_hispanic"
)
)
xtabs(formula = ~ mother_race + mother_ethnicity,
data = df)
## mother_ethnicity
## mother_race hispanic not_hispanic refused
## american_indian 3 0 0
## asian 0 16 0
## black 0 13 0
## more_than_one 14 6 0
## other 24 5 1
## refused 1 1 2
## white 48 318 0
Home visit data
targets::tar_load(home_visit_df, store="../_targets")
Demographics
Child age
Child age in months (age_group
) by child_sex
.
Note: The child’s exact age in months is part of the Databrary-related data. That is on the work plan.
home_visit_df %>%
dplyr::filter(.,!is.na(age_group),!is.na(child_sex)) %>%
ggplot() +
aes(age_group, fill = child_sex) +
geom_bar() +
theme(legend.position = "bottom") +
theme(legend.title = element_blank())
Language exposure
df <- home_visit_df %>%
dplyr::mutate(., language_child = stringr::str_replace_all(language_child, " ", "_"))
xtabs(formula = ~ child_sex + language_child, data = df)
## language_child
## child_sex english english_spanish
## female 131 24
## male 130 41
Locomotor milestones
play_loco <- home_visit_df %>%
dplyr::select(
.,
age_group,
child_sex,
language_child,
site_id,
subject_number,
locomotor_milestones.who_walk.who_walk_onset_mo,
locomotor_milestones.k_walk.k_walk_onset_mo,
locomotor_milestones.crawl_onset.crawl_onset_mo
) %>%
dplyr::rename(
.,
walk_mos_who = locomotor_milestones.who_walk.who_walk_onset_mo,
walk_mos_kea = locomotor_milestones.k_walk.k_walk_onset_mo,
crawl_mos = locomotor_milestones.crawl_onset.crawl_onset_mo
) %>%
dplyr::mutate(
.,
walk_mos_who = as.numeric(walk_mos_who),
walk_mos_kea = as.numeric(walk_mos_kea),
crawl_mos = as.numeric(crawl_mos)
)
Check for anomalous values
crawl_mos_min <- 4
walk_mos_min <- 6
Anomalous crawling onset
play_loco %>%
dplyr::select(., site_id, subject_number, crawl_mos) %>%
dplyr::filter(., crawl_mos < crawl_mos_min) %>%
knitr::kable(format = 'html') %>%
kableExtra::kable_classic()
site_id | subject_number | crawl_mos |
---|---|---|
CHOPH | 003 | 3.72 |
Crawl onset
play_loco %>%
dplyr::filter(., crawl_mos > crawl_mos_min, !is.na(crawl_mos)) %>%
ggplot(.) +
aes(crawl_mos, fill = child_sex) +
geom_histogram(bins = 12) +
theme(legend.position = "bottom") +
theme(legend.title = element_blank())
Walk onset
play_loco %>%
dplyr::filter(., walk_mos_kea > walk_mos_min, !is.na(walk_mos_kea)) %>%
ggplot(.) +
aes(walk_mos_kea, fill = child_sex) +
theme(legend.position="bottom") +
geom_histogram(bins = 10)
play_loco %>%
dplyr::filter(., walk_mos_who > walk_mos_min, !is.na(walk_mos_who)) %>%
ggplot(.) +
aes(walk_mos_who, fill = child_sex) +
geom_histogram(bins=12) +
theme(legend.position="bottom") +
theme(legend.title = element_blank())
play_loco %>%
dplyr::filter(., walk_mos_who > walk_mos_min, !is.na(walk_mos_who), walk_mos_kea > walk_mos_min, !is.na(walk_mos_kea)) %>%
ggplot(.) +
aes(walk_mos_who, walk_mos_kea, color = child_sex) +
geom_point() +
geom_smooth(method = "lm") +
xlim(8, 18) +
ylim(8, 18) +
theme(legend.position = "bottom") +
theme(aspect.ratio = 1) +
theme(legend.title = element_blank()) -> walk_p
ggExtra::ggMarginal(
walk_p,
play_loco,
walk_mos_who,
walk_mos_kea,
type = "density",
margins = "both",
groupColour = TRUE,
groupFill = TRUE
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
play_loco %>%
dplyr::filter(., crawl_mos > crawl_mos_min, !is.na(crawl_mos), walk_mos_kea > walk_mos_min, !is.na(walk_mos_kea)) %>%
ggplot(.) +
aes(crawl_mos, walk_mos_kea, color = child_sex) +
geom_point() +
geom_smooth(method = "lm") +
theme(legend.position = "bottom") +
theme(aspect.ratio = 1) +
theme(legend.title = element_blank()) -> walk_p
ggExtra::ggMarginal(
walk_p,
play_loco,
walk_mos_who,
walk_mos_kea,
type = "density",
margins = "both",
groupColour = TRUE,
groupFill = TRUE
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Health
Feeding
feeding <- home_visit_df %>%
dplyr::select(
.,
age_group,
child_sex,
site_id,
subject_number,
language_child,
health.feeding_nutrition.breastfeed,
health.feeding_nutrition.solidfood_age
) %>%
dplyr::rename(., breastfeed = health.feeding_nutrition.breastfeed,
solid_food_mos = health.feeding_nutrition.solidfood_age) %>%
dplyr::mutate(., solid_food_mos = as.numeric(solid_food_mos))
xtabs(formula = ~ child_sex + breastfeed, data = feeding)
## breastfeed
## child_sex donotknow no refused yes
## female 1 3 0 150
## male 0 10 1 156
feeding %>%
ggplot(.) +
aes(x = solid_food_mos, color = child_sex, fill = child_sex) +
geom_histogram(bins = 15) +
theme(legend.title = element_blank())
## Warning: Removed 5 rows containing non-finite values (`stat_bin()`).
Clearly, there are some impossible values here.
feeding |>
dplyr::select(site_id, subject_number, solid_food_mos) %>%
dplyr::filter(., solid_food_mos > 12) %>%
knitr::kable(format = 'html') %>%
kableExtra::kable_classic()
site_id | subject_number | solid_food_mos |
---|---|---|
NYUNI | 996 | 999 |
Smoking/drinking
smoking_drinking <- home_visit_df %>%
dplyr::select(
.,
age_group,
child_sex,
language_child,
health.smoking.pregnant_smoking,
health.drinking.pregnant_drinking
) %>%
dplyr::rename(., preg_smoking = health.smoking.pregnant_smoking,
preg_drinking = health.drinking.pregnant_drinking)
xtabs(formula = ~ preg_smoking + preg_drinking, smoking_drinking)
## preg_drinking
## preg_smoking no refused yes
## no 257 0 61
## refused 0 1 0
Sleeping position
sleeping_pos <- home_visit_df %>%
dplyr::select(
.,
age_group,
child_sex,
language_child,
health.general_health.child_sleeping_position
) %>%
dplyr::rename(., child_sleeping_position = health.general_health.child_sleeping_position)
xtabs(formula = ~ child_sleeping_position, data = sleeping_pos)
## child_sleeping_position
## back changed donotknow other refused side stomach_side
## 305 4 1 2 1 2 6