library(dplyr)
library(tidyr)
library(egor)
# =========================
# 1. LOAD WAVE 1 DATA
# =========================
alt_obj <- load("DS0001/36975-0001-Data.rda")
altw1 <- get(alt_obj)
ego_obj <- load("DS0005/36975-0005-Data.rda")
egow1 <- get(ego_obj)
# Alter-alter pair variables in the alter file
pair_vars <- c(
"N1_N2", "N1_N3", "N1_N4", "N1_N5",
"N2_N3", "N2_N4", "N2_N5",
"N3_N4", "N3_N5",
"N4_N5"
)
# =========================
# 2. EGO-LEVEL DATA
# =========================
egos <- egow1 %>%
select(
PRIM_KEY, GENDER, YEAR_PRELOAD, AGE_EX, A2A, A13A, A19A, B3B,
G1, H3, H4A, H5A, H6A, J1A, J1B, K1, RACECATS1, K9A, K15, K22A
) %>%
transmute(
.egoID = as.character(PRIM_KEY),
# 0 = male, 1 = female
gender = ifelse(as.numeric(GENDER) == 2, 1, 0),
year_birth = YEAR_PRELOAD,
# exact age in years
age = as.numeric(AGE_EX),
# 1 = married, 0 = otherwise
married = ifelse(as.numeric(A2A) == 1, 1, 0),
# 1 = yes, 0 = no
pets = ifelse(as.numeric(A13A) == 1, 1, 0),
# 1 = own, 0 = rent/other
home_owner = ifelse(as.numeric(A19A) == 1, 1, 0),
# 1 = active in informal groups, 0 = not active
informal_groups = ifelse(as.numeric(B3B) == 1, 1, 0),
# ordered self-rated health
health = ordered(
as.numeric(G1),
levels = c(1, 2, 3, 4, 5),
labels = c("excellent", "very_good", "good", "fair", "poor")
),
# 1 = advised to lose weight, 0 = otherwise
lose_weight_advice = ifelse(as.numeric(H3) == 1, 1, 0),
# 1 = smoker, 0 = non-smoker
smoker = ifelse(as.numeric(H4A) == 1, 1, 0),
# days per week drinking alcohol
drinking_days = as.numeric(H5A),
# 1 = used marijuana, 0 = otherwise
marijuana_use = ifelse(as.numeric(H6A) == 1, 1, 0),
# extraversion = reverse-coded reserved + sociable
extraversion = rowMeans(
cbind(
6 - as.numeric(J1A),
as.numeric(J1B)
),
na.rm = TRUE
),
# ordered education
education = ordered(
as.numeric(K1),
levels = 1:10,
labels = c(
"less_than_9th",
"9th_to_12th_no_grad",
"high_school",
"ged",
"some_college",
"associate",
"bachelor",
"master",
"professional_degree",
"other"
)
),
# race
race = factor(
as.numeric(RACECATS1),
levels = c(1, 2, 3, 4, 6, 7, 9),
labels = c(
"white",
"black",
"american_indian",
"asian",
"unknown",
"pacific_islander",
"mixed"
)
),
# country of birth collapsed to continent
birth_continent = factor(
as.numeric(K9A),
levels = c(1, 2, 4, 6, 11, 3, 7, 8, 9, 10, 12, 14, 5, 13, 15),
labels = c(
"north_america", "north_america", "north_america",
"north_america", "north_america",
"asia", "asia", "asia", "asia", "asia", "asia", "asia",
"europe", "europe",
"other"
)
),
# religion
religion = factor(
as.numeric(K15),
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c(
"protestant",
"catholic",
"jewish",
"muslim",
"buddhist",
"other_religion",
"no_religion"
)
),
# ordered income
income = ordered(
as.numeric(K22A),
levels = 1:13,
labels = c(
"under_15k",
"15k_25k",
"25k_35k",
"35k_45k",
"45k_60k",
"60k_75k",
"75k_100k",
"100k_125k",
"125k_150k",
"150k_200k",
"200k_300k",
"300k_500k",
"500k_plus"
)
)
) %>%
distinct()
# =========================
# 3. ALTER-LEVEL DATA
# =========================
alters <- altw1 %>%
filter(!is.na(PRIM_KEY), !is.na(NAME_NMBR2)) %>%
transmute(
.egoID = as.character(PRIM_KEY),
.altID = as.integer(NAME_NMBR2),
# match ego coding: 0 = male, 1 = female
gender = ifelse(as.numeric(N_GENDER) == 2, 1, 0),
# family indicator
family = ifelse(
C1A_1 == "(1) yes" |
C1A_3 == "(1) yes" |
C1A_4 == "(1) yes" |
C1A_5 == "(1) yes" |
C1A_6 == "(1) yes",
1, 0
),
# race homophily indicator
same_race = ifelse(C2G_NSAMERACE == "(1) yes", 1, 0),
# alter roughly same age as ego (+/- 6 years)
same_age = case_when(
as.numeric(C1C_NSAMEAGE) == 1 ~ 1,
as.numeric(C1C_NSAMEAGE) == 0 ~ 0,
TRUE ~ NA_real_
)
) %>%
distinct()
# =========================
# 4. ALTER-ALTER TIES
# =========================
aaties <- altw1 %>%
select(PRIM_KEY, all_of(pair_vars)) %>%
distinct() %>%
pivot_longer(
cols = all_of(pair_vars),
names_to = "pair",
values_to = "relation"
) %>%
filter(!is.na(relation)) %>%
mutate(
relation = as.character(relation),
# tie if alters know each other very well or know a little
weight = ifelse(grepl("^\\((1|2)\\)", relation), 1, 0),
.egoID = as.character(PRIM_KEY),
.srcID = as.integer(sub("^N([1-5])_N([1-5])$", "\\1", pair)),
.tgtID = as.integer(sub("^N([1-5])_N([1-5])$", "\\2", pair))
) %>%
filter(weight == 1) %>%
select(.egoID, .srcID, .tgtID, weight)
# Keep only alter-alter ties for alters that exist in the alter table
valid_alters <- alters %>%
select(.egoID, .altID)
aaties <- aaties %>%
inner_join(valid_alters, by = c(".egoID", ".srcID" = ".altID")) %>%
inner_join(valid_alters, by = c(".egoID", ".tgtID" = ".altID"))
# =========================
# 5. BUILD egor OBJECT
# =========================
ego_net_w1 <- egor(
egos = egos,
alters = alters,
aaties = aaties,
ID.vars = list(
ego = ".egoID",
alter = ".altID",
source = ".srcID",
target = ".tgtID"
)
)
# Inspect results
ego_net_w1
head(egos)
head(alters)
head(aaties)Appendix A — Constructing the UCNets Ego Network Dataset
In this chapter, we draw on ego network data from the UC Berkeley Social Networks Study (UCNets) (Fischer 2020), a longitudinal study of personal networks in the San Francisco Bay Area conducted between 2015 and 2018. Specifically, we use data from the first wave and select only a subset of variables relevant for the examples presented in Chapter 8. Several of these variables are recoded to create analytically useful ego-, alter-, and alter–alter-level measures.
Because the UCNets data are restricted-access, readers must register and obtain access themselves in order to reproduce the dataset from the original files. Further information about the study is available at the UCNets project website.
The full code used to construct the dataset is provided in this appendix. This code processes the raw survey files and combines them into an egor object, the data structure used by the egor package for ego-centered network analysis. An egor object contains three linked levels of data:
- ego-level attributes (characteristics of the respondents)
- alter-level attributes (characteristics of the individuals named in each respondent’s personal network)
- alter–alter ties (relationships among those alters within each ego network)
To construct the ego-centered network dataset used in Chapter 8, we combine information from two wave-1 files: one containing respondent-level information and one containing alter- and alter–alter-level information. These are then organized into the three components required by the egor package: an ego table, an alter table, and an alter–alter tie table.
A.1 Ego-level variables
The ego table contains one row per respondent and includes the following variables:
.egoID– respondent identifier derived fromPRIM_KEY.gender– binary indicator of respondent gender (0 = male,1 = female), recoded fromGENDER.year_birth– respondent year of birth (YEAR_PRELOAD).age– respondent exact age in years, retained as a numeric variable fromAGE_EX.married– binary indicator of marital status (1 = married,0 = otherwise), derived fromA2A.pets– indicator for whether the respondent has pets in the household (1 = yes,0 = no), fromA13A.home_owner– housing tenure indicator (1 = own,0 = rent/other), fromA19A.informal_groups– participation in informal social groups (1 = active,0 = not active), fromB3B.health– self-rated health retained as an ordered factor (excellent,very_good,good,fair,poor), fromG1.lose_weight_advice– whether a health professional advised the respondent to lose weight (1 = yes,0 = no), fromH3.smoker– cigarette smoking indicator (1 = yes,0 = no), fromH4A.drinking_days– number of days per week the respondent drinks alcohol, fromH5A. Value 9 means doesn’t drink and value 8 is leass than once a week.marijuana_use– marijuana use during the last year (1 = yes,0 = no), fromH6A.extraversion– personality measure combining two items: the item measuring being reserved (J1A) is reverse-coded and averaged with the sociability item (J1B), producing a continuous indicator where higher values indicate a more outgoing disposition.education– highest completed level of education retained as an ordered factor, fromK1.race– categorical race variable based onRACECATS1.birth_continent– continent of birth derived fromK9A, collapsing individual countries into broader regions (North America, Asia, Europe, Other).religion– categorical religious affiliation fromK15.income– ordered factor representing personal income brackets, fromK22A.
A.2 Alter-level variables
The alter table contains one row per alter named by a respondent and includes the following variables:
.egoID– identifier linking each alter to the respondent who named them (PRIM_KEY)..altID– alter identifier within each ego network (NAME_NMBR2).gender– alter gender recoded to match the ego coding (0 = male,1 = female), fromN_GENDER.family– binary indicator of whether the alter is a family member (1 = family,0 = non-family). This is constructed from the relationship indicators for spouse/partner, parent, child, sibling, or other relative (C1A_1–C1A_6).same_race– indicator of racial homophily between ego and alter (1 = same race,0 = different race), derived fromC2G_NSAMERACE.same_age– indicator of age homophily between ego and alter (1 = alter is approximately the same age as ego, within ±6 years, 0 = otherwise), derived fromC1C_NSAMEAGE. Special codes and missing values are treated as missing.
A.3 Alter–alter ties
The alter–alter tie table represents relationships among alters within each ego network. The original data store these relationships in a wide format through the ten pairwise variables N1_N2, N1_N3, N1_N4, N1_N5, N2_N3, N2_N4, N2_N5, N3_N4, N3_N5, and N4_N5. Each variable records how well one pair of alters knows each other.
These variables are reshaped into long format so that each row represents one potential alter–alter tie within one ego network. The pair labels are parsed to create .srcID and .tgtID, identifying the two alters involved. A binary tie variable weight is then created: responses coded as (1) or (2) are treated as a tie, while all other responses are treated as no tie. In substantive terms, this means that alters are considered connected if they know each other either very well or a little. Only ties with weight = 1 are retained in the final alter–alter tie table.
Finally, alter–alter ties are filtered so that both endpoints correspond to valid alters in the alter table. This ensures that the three components remain internally consistent.
A.4 Constructing the egor object
After these recodings, the data are organized into the three linked tables required by egor:
egos: one row per respondent, with ego-level attributesalters: one row per alter, with alter-level attributesaaties: one row per alter–alter tie within each ego network
These are then combined into a single egor object using .egoID as the ego identifier, .altID as the alter identifier, and .srcID and .tgtID as the identifiers for alter–alter ties. This structure allows us to analyze ego network composition, homophily, and local network structure within a unified framework.