Philadelphia accounted for its lowest fraction of state turnout
First things first: Philadelphia cast a record 742K votes for President. (Note, this is different from the 749K turnout because 7K voters left President blank. For data availability reasons, I focus on votes cast for topline office.)
View code
library(tidyverse)
cofile_pattern <-"^PA_(Uno|O)fficial_([0-9]{4})_general_results(_[0-9]+)?.CSV"
cofiles <- list.files(
"../../data/pa_election_data/electionreturns.pa.gov/",
pattern=cofile_pattern,
full.names = TRUE
)
res <- vector(mode="list")
for(f in cofiles){
df_co <- readr::read_csv(f) %>%
rename(
county = `County Name`,
office = `Office Name`,
election = `Election Name`,
candidate = `Candidate Name`,
party = `Party Name`,
votes=Votes
) %>%
mutate(county = tolower(county)) %>%
select(county, office, election, candidate, party, votes) %>%
filter(substr(office, 1, 8) %in% c("Presiden", "Governor")) %>%
mutate(year = substr(election, 1, 4))
res[[f]] <- df_co
}
df <- bind_rows(res)
# table(df$year)
asnum <- function(x) as.numeric(as.character(x))
county_results <- df %>%
group_by(county, year) %>%
mutate(
turnout=sum(votes),
pvote = votes / turnout,
cycle=ifelse(asnum(year) %% 4 == 0, "President", "Governor"),
county_group = case_when(
county == "philadelphia" ~ "Philadelphia",
county %in% c("bucks", "delaware", "montgomery", "chester") ~ "Phila Suburbs",
TRUE ~ "Rest of State"
)
) %>%
filter(party %in% c("Democratic", "Republican")) %>%
select(-votes, -candidate) %>%
pivot_wider(names_from = party, values_from=pvote, names_prefix = "pvote_")
ggplot(
county_results %>% filter(county == "philadelphia"),
aes(x=asnum(year), y=turnout)
) +
geom_line(
aes(group=cycle, linetype=cycle),
size=1,
color=strong_blue
) +
geom_point(size=4, color=strong_blue) +
scale_linetype_manual(values=c(President="solid", Governor="dashed"), guide=FALSE) +
geom_text(
data=tribble(
~year, ~turnout, ~label,
"2015", 380e3, "Governor",
"2015", 650e3, "President"
),
aes(label=label),
fontface="bold",
color=strong_blue,
size=4,
hjust=-0.1
) +
theme_sixtysix() +
scale_y_continuous(labels=scales::comma, breaks = seq(0, 700e3, 100e3)) +
expand_limits(y=0) +
labs(
title="Philadelphia cast its most votes in decades",
y="Votes Cast",
x=NULL
)
But the rest of the state grew much, much more.
View code
ggplot(
county_results %>%
filter(year %in% c(2016, 2020)) %>%
mutate(gap = pvote_Democratic - pvote_Republican) %>%
select(county, county_group, year, cycle, turnout, gap) %>%
pivot_longer(c(turnout, gap)) %>%
unite(var, name, year, sep="_") %>%
pivot_wider(names_from=var, values_from=value),
aes(
x=100*(gap_2016 + gap_2020)/2,
y=100*(turnout_2020 - turnout_2016)/turnout_2016,
color=county_group
)
) +
geom_point(aes(size=(turnout_2016 + turnout_2020)/2)) +
geom_text(
data=tribble(
~x, ~y, ~county_group,
20, 20, "Phila Suburbs",
42, 7, "Philadelphia"
),
aes(x=x, y=y, label=county_group),
size=4,
fontface="bold",
hjust=0
) +
scale_color_manual(
values=c("Philadelphia"=strong_blue, "Rest of State" = light_grey, "Phila Suburbs" = strong_green),
guide=FALSE
) +
scale_x_continuous(
"Average of 2016 and 2020 Results",
labels=function(x) sprintf("+%s%% %s", abs(x), ifelse(x >= 0, "Dem", "Rep"))
) +
scale_y_continuous(
"Change in votes cast (as % of 2016)",
labels=scales::comma
) +
scale_size_area(
"Avg(2016 votes cast, 2020 votes cast)",
labels=scales::comma
) +
theme_sixtysix() +
geom_hline(yintercept=0)+
labs(
title="Philadelphia's turnout grew less than the state.",
subtitle="Second lowest growth, higher only than State College's Centre County."
)
The net result is that Philadelphia represented its smallest share of the Presidential vote since at least 2000.
View code
grouped_turnout <- county_results %>%
group_by(year, cycle, county_group) %>%
summarise(turnout=sum(turnout)) %>%
group_by(year) %>%
mutate(frac = turnout / sum(turnout))
# grouped_turnout %>% filter(county_group == "Philadelphia") %>% arrange(frac)
# grouped_turnout %>% filter(county_group == "Phila Suburbs") %>% arrange(frac)
ggplot(
grouped_turnout %>% filter(county_group != "Rest of State"),
aes(x=year, y=100*frac, color=county_group)
) +
geom_line(aes(group=interaction(cycle, county_group), linetype=cycle), size=1) +
geom_point(size=4) +
scale_linetype_manual(values=c(President="solid", Governor="dashed")) +
scale_color_manual(
values=c("Philadelphia"=strong_blue, "Rest of State" = strong_red, "Phila Suburbs" = strong_green),
guide=FALSE
) +
geom_text(
data=tribble(
~year, ~frac, ~county_group,
"2016", 0.13, "Philadelphia",
"2016", 0.20, "Phila Suburbs"
),
aes(label=county_group),
hjust=0,
fontface="bold"
) +
theme_sixtysix() +
# theme(title=element_text(size=8)) +
expand_limits(y=0) +
labs(
title="Philadelphia and Suburbs' vote share",
subtitle="The city accounted for its lowest share of presidential votes since at least 2000.",
x=NULL,
y="Percent of PA's Votes Cast",
linetype="Cycle"
)
Perhaps more surprising than turnout, though, was that Philadelphia’s percent for Trump grew from four years ago. That was only true of a few other counties, all really Republican.
View code
ggplot(
county_results %>%
filter(year %in% c(2016, 2020)) %>%
mutate(gap = pvote_Democratic - pvote_Republican) %>%
select(county, county_group, year, cycle, turnout, gap) %>%
pivot_longer(c(turnout, gap)) %>%
unite(var, name, year, sep="_") %>%
pivot_wider(names_from=var, values_from=value),
aes(
size=(turnout_2016 + turnout_2020)/2,
x=100*gap_2016,
y=100*(gap_2020 - gap_2016),
color=county_group
)
) +
geom_point() +
scale_color_manual(
values=c("Philadelphia"=strong_blue, "Rest of State" = light_grey, "Phila Suburbs" = strong_green),
guide=FALSE
) +
geom_text(
data=tribble(
~x, ~y, ~county_group,
20, 6, "Phila Suburbs",
42, -3, "Philadelphia"
),
aes(x=x, y=y, label=county_group),
size=4,
fontface="bold",
hjust=0
) +
scale_x_continuous(
"2016 Result",
labels=function(x) sprintf("+%s%% %s", abs(x), ifelse(x >= 0, "Clinton", "Trump"))
) +
scale_y_continuous(
"2020 Biden vs 2016 Clinton",
labels=function(x) sprintf("+%s%% %s", abs(x), ifelse(x > 0, "Biden", "Clinton"))
) +
scale_size_area(
"Avg(2016 turnout, 2020 turnout)",
labels=scales::comma
) +
theme_sixtysix() +
geom_hline(yintercept=0)+
labs(
title="Biden won less of Philadelphia than Clinton",
subtitle="Change in county preferences, 2016 to 2020."
)
There’s tension between the plots above and the fact that Biden would not have won PA without Philadelphia. If you delete Philadelphia from the state, Trump wins handily. People who worked hard to make the city’s record turnout happen can feel unappreciated by pieces like this. And it’s not obvious what the counterfactual is: without their hard work, would Philadelphia’s turnout have actually been down? But the following is clear: if you want to know why Trump won PA four years ago and lost it this year, the answer is not “Philadelphia changed”. In fact, changes in Philadelphia swung towards Trump. Nate Cohn has a great thread on this tension.
More importantly, talking about Black voters’ “flat” turnout or Hispanic voters’ shift towards Trump ignores the fact that they voted more overwhelmingly for Biden than White voters did, and have carried the party for decades. It feels pejorative of groups that have long been the party’s most steadfast base of support. And it feels especially callous after a year of Covid and police violence hit Black communities the hardest. I point out below that turnout was flat, but it’s important to make clear that I haven’t done any of the necessary reporting to understand *why*.
To be clear, Philadelphia’s Black wards voted for Biden at 95%. And if we extend that to Black voters in other wards, Black voters probably account for more than half of Biden’s Philadelphia votes.
Patterns within the city
Within the city, there are five clear groups of Divisions: Wealthy Progressive divisions that turned out in droves, Trumpy Divisions that did too, Black Divisions where turnout was flat, Hispanic Divisions where turnout fell and preferences moved towards Trump, and student Divisions where turnout cratered.
View code
library(leaflet)
make_leaflet <- function(
data,
get_color,
title,
is_percent=FALSE,
is_race=FALSE, #if is_race, color should be % Dem - % Rep
diverge_at_zero=FALSE,
zoom=6
){
color <- get_color(data)
vals <- cut_vals(color, is_race, diverge_at_zero)
legend_list <- create_legend(
vals$min,
vals$max,
vals$step_size,
vals$pal,
is_percent,
is_race,
diverge_at_zero
)
init_map(data, zoom) %>%
addPolygons(
data=data$geometry,
weight=0,
color="white",
opacity=1,
fillOpacity = 0.8,
smoothFactor = 0,
fillColor = vals$pal(color),
popup=data$popup
) %>%
addControl(title, position="topright", layerId="map_title") %>%
addLegend(
layerId="geom_legend",
position="bottomright",
colors=legend_list$colors,
labels=legend_list$labels
)
}
init_map <- function(data, zoom){
bbox <- st_bbox(data)
leaflet(
options=leafletOptions(
minZoom=zoom,
# maxZoom=zoom,
zoomControl=TRUE,
dragging=TRUE
)
)%>%
setView(
zoom=zoom,
lng=mean(bbox[c(1,3)]),
lat=mean(bbox[c(2,4)])
) %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
setMaxBounds(
lng1=bbox["xmin"],
lng2=bbox["xmax"],
lat1=bbox["ymin"],
lat2=bbox["ymax"]
)
}
create_legend <- function(
min_value,
max_value,
step_size,
pal,
is_percent,
is_race,
diverge_at_zero
){
legend_values <- seq(min_value, max_value, step_size)
legend_colors <- pal(legend_values)
if(is_race & is_percent){
legend_labels <- sprintf(
"+%s%% %s",
abs(legend_values),
case_when(
legend_values==0 ~ "",
legend_values > 0 ~ "Dem",
legend_values < 0 ~ "Rep"
)
)
}else if(is_percent){
legend_labels <- sprintf("%s%%", legend_values)
} else if(is_race){
legend_labels <- sprintf(
"+%s %s",
comma(abs(legend_values)),
case_when(
legend_values==0 ~ "",
legend_values > 0 ~ "Dem",
legend_values < 0 ~ "Rep"
)
)
} else {
legend_labels <- scales::comma(legend_values)
}
list(
colors=legend_colors,
values=legend_values,
labels=legend_labels
)
}
cut_vals <- function(x, is_race, diverge_at_zero){
min_value <- min(x, na.rm=TRUE)
max_value <- max(x, na.rm=TRUE)
if(diverge_at_zero) {
max_value <- max(abs(min_value), abs(max_value))
min_value <- -max_value
}
sigfig <- round(log10(max_value-min_value)) - 1
step_size <- 2 * 10^(sigfig)
min_value <- step_size * floor(min_value / step_size)
max_value <- step_size * ceiling(max_value / step_size)
if(!is_race & !diverge_at_zero){
pal <- colorNumeric(
"viridis",
domain=c(min_value, max_value)
)
} else if(is_race){
pal <- colorNumeric(
c(strong_red, "grey95", strong_blue),
domain=c(min_value, max_value)
)
} else {
pal <- colorNumeric(
c(strong_orange, "grey95", strong_purple),
domain=c(min_value, max_value)
)
}
list(
min=min_value,
max=max_value,
step_size=step_size,
pal=pal
)
}
make_leaflet_circles <- function(
data,
get_color,
get_radius,
title,
is_percent=FALSE,
is_race=FALSE, #if is_race, should be % Dem - % Rep
diverge_at_zero=FALSE,
zoom=6
){
radius <- get_radius(data)
max_radius <- 20
radius <- max_radius * radius / max(radius)
data <- data[order(radius, decreasing=TRUE),]
radius <- radius[order(radius, decreasing=TRUE)]
color <- get_color(data)
vals <- cut_vals(color, is_race, diverge_at_zero)
legend_list <- create_legend(
vals$min,
vals$max,
vals$step_size,
vals$pal,
is_percent,
is_race
)
init_map(data, zoom) %>%
addCircleMarkers(
lat=asnum(data$INTPTLAT),
lng=asnum(data$INTPTLON),
radius=radius,
# weight=0,
stroke=FALSE,
color=vals$pal(color),
opacity=1,
fillOpacity = 1,
# smoothFactor = 0,
fillColor = vals$pal(color),
popup=data$popup
) %>%
addControl(title, position="topright", layerId="map_title") %>%
addLegend(
layerId="geom_legend",
position="bottomright",
colors=legend_list$colors,
labels=legend_list$labels
)
}
pretty_time <- function(time){
gsub("^0", "", format(time, "%I:%M %p"))
}
Philadelphia was still overwhelmingly Democratic, casting 81% of its votes for Biden to 18% for Trump.
View code
wards <- st_read("../../data/gis/warddivs/201911/Political_Wards.shp", quiet=TRUE) %>%
mutate(ward=sprintf("%02d", asnum(WARD_NUM)))
phila_res <- readRDS("../election_night_2020/tmp/tmp_phila_res.RDS")
phila_res %<>%
group_by(ward) %>%
mutate(turnout=sum(votes)) %>%
ungroup() %>%
pivot_wider(names_from=party, values_from=votes, names_prefix = "votes_")
phila_res_16 <- readr::read_csv("../../data/raw_election_data/2016_general.csv") %>%
filter(OFFICE == "PRESIDENT AND VICE PRESIDENT OF THE UNITED STATES") %>%
mutate(ward=sprintf("%02d", WARD)) %>%
mutate(party=case_when(
PARTY == "DEMOCRATIC" ~ "D",
PARTY == "REPUBLICAN" ~ "R",
TRUE ~ "O"
)) %>%
group_by(ward, party) %>%
summarise(votes=sum(VOTES)) %>%
group_by(ward) %>%
mutate(turnout_16 = sum(votes)) %>%
pivot_wider(names_from=party, values_from=votes, names_prefix = "votes_16_")
phila_df <- read.csv("../election_night_2020/tmp/mailin_phila.csv") %>%
mutate(ward=substr(warddiv, 1, 2)) %>%
select(-warddiv, -turnout_16) %>%
group_by(ward) %>%
summarise_all(sum)
wards %<>%
left_join(phila_res, by="ward") %>%
left_join(phila_df, by="ward") %>%
left_join(phila_res_16)
if(nrow(wards) != 66) stop()
library(scales)
wards %<>%
mutate(
popup=sprintf(
paste(
c(
"<b>Ward %s</b>",
"Total Votes Counted: %s",
"2016 Votes Cast: %s",
"Change: %s",
"Active Registered Voters: %s",
"Turnout as %% of RVs: %0.0f%%",
"",
"Biden: %s (%0.0f%%)",
"Trump: %s (%0.0f%%)",
"",
"Clinton 2016: %s (%0.0f%%)",
"Trump 2016: %s (%0.0f%%)"
),
collapse = "<br>"
),
WARD_NUM,
comma(turnout),
comma(turnout_16),
sprintf(
"%s%0.0f%%",
ifelse(turnout > turnout_16, "+", "-"),
abs(100 * (turnout - turnout_16) / turnout_16)
),
comma(n_reg),
100 * turnout / n_reg,
comma(votes_D), 100*votes_D / turnout,
comma(votes_R), 100*votes_R / turnout,
comma(votes_16_D), 100*votes_16_D / turnout_16,
comma(votes_16_R), 100*votes_16_R / turnout_16
)
)
render_iframe <- function(widget, file=NULL){
DIR <- "leaflet_files"
if(!dir.exists(DIR)) dir.create(DIR)
if(is.null(file)){
obj.name <- deparse(substitute(widget))
file <- sprintf("%s.html", obj.name)
}
setwd(DIR) # saveWidget can't save to a folder
htmlwidgets::saveWidget(
widget,
file=file,
selfcontained=TRUE
)
setwd("..")
sprintf(
'<iframe src="%s/%s" width="100%%" height="600" scrolling="no" frameborder="0"></iframe>',
DIR, file
)
}
lf_res <- make_leaflet(
data=wards,
get_color=function(df) 100*(df$votes_D - df$votes_R)/df$turnout,
is_percent = TRUE,
title=sprintf("Presidential results"),
is_race=TRUE,
diverge_at_zero = TRUE,
zoom=11
)
Turnout rose sharply in Center City and Fishtown, rose broadly across the Northeast, was flat in West Philly, fell in North Philly, and cratered in the wards around Penn, Drexel, and Temple.
View code
lf_turnout <- make_leaflet(
data=wards,
get_color=function(df) 100*(df$turnout - df$turnout_16)/df$turnout_16,
is_percent = TRUE,
title=sprintf("Change in votes cast from 2016"),
is_race=FALSE,
diverge_at_zero = TRUE,
zoom=11
)
The correlation with demographics is striking. Merging in and crosswalking 2018 ACS estimates, we see that turnout was down sharply in Hispanic wards and flat in Black wards (in an election where the state and rest of the city was sharply up).
View code
pops <- read.csv(
"../../data/census/acs_2018_5yr_age_phila/ACSST5Y2018.S1501_data_with_overlays_2020-11-08T110945.csv",
skip = 1
)
pops <- pops %>%
rename(
pop_1824 = Estimate..Total..Population.18.to.24.years,
pop_25over = Estimate..Total..Population.25.years.and.over,
somecol_1824=Estimate..Total..Population.18.to.24.years..Some.college.or.associate.s.degree,
colplus_1824 = Estimate..Total..Population.18.to.24.years..Bachelor.s.degree.or.higher
) %>%
dplyr::select(id, Geographic.Area.Name,pop_25over, pop_1824, somecol_1824, colplus_1824)
crosswalk <- readRDS("../../data/gis_crosswalks/bgs10_to_divs_201911.Rds")
crosswalk %<>% mutate(tract=substr(bg_fips, 1, 11)) %>%
group_by(tract, WARD, DIV) %>%
summarise(pop=sum(pop10), weight=sum(weight))
div_pops <- pops %>%
mutate(tract=gsub("^1400000US", "", id)) %>%
left_join(crosswalk, by = "tract") %>%
group_by(WARD, DIV) %>%
summarise(
pop_25over=sum(pop_25over*weight),
pop_1824=sum(pop_1824*weight),
somecol_1824 =sum(somecol_1824 *weight),
colplus_1824=sum(colplus_1824*weight)
)
ward_pops <- div_pops %>% group_by(WARD) %>%
summarise_at(vars(pop_25over:colplus_1824), sum) %>%
mutate(
p_1824_col=(somecol_1824 + colplus_1824)/(pop_25over + pop_1824)
)
wards <- wards %>%
left_join(ward_pops %>% rename(ward=WARD))
pops_race <- read.csv(
"../../data/census/acs_2018_5yr_agerace_phila/ACSDP5Y2018.DP05_data_with_overlays_2020-11-08T121412.csv",
skip=1
)
pops_race <- pops_race %>%
rename(
pop_total=Estimate..HISPANIC.OR.LATINO.AND.RACE..Total.population,
pop_hisp=Estimate..HISPANIC.OR.LATINO.AND.RACE..Total.population..Hispanic.or.Latino..of.any.race.,
pop_nhwhite=Estimate..HISPANIC.OR.LATINO.AND.RACE..Total.population..Not.Hispanic.or.Latino..White.alone,
pop_nhblack=Estimate..HISPANIC.OR.LATINO.AND.RACE..Total.population..Not.Hispanic.or.Latino..Black.or.African.American.alone
) %>%
select(id, Geographic.Area.Name, pop_total, pop_hisp, pop_nhwhite, pop_nhblack)
div_race <- pops_race %>%
mutate(tract=gsub("^1400000US", "", id)) %>%
left_join(crosswalk, by = "tract") %>%
group_by(WARD, DIV) %>%
summarise(
pop_total=sum(pop_total*weight),
pop_hisp=sum(pop_hisp*weight),
pop_nhwhite =sum(pop_nhwhite *weight),
pop_nhblack=sum(pop_nhblack*weight)
)
ward_race <- div_race %>% group_by(WARD) %>%
summarise_at(vars(pop_total:pop_nhblack), sum)
wards <- wards %>% left_join(ward_race %>% rename(ward=WARD))
df_race <- wards %>%
as.data.frame() %>%
pivot_longer(
cols = c(pop_hisp, pop_nhwhite, pop_nhblack),
names_to="race",
values_to="pop"
) %>%
mutate(prace = pop / pop_total)%>%
mutate(race_raw=gsub("pop_", "", race)) %>%
mutate(
race_formatted=case_when(
race_raw=="nhwhite" ~ "Non-Hispanic White",
race_raw=="nhblack" ~ "Non-Hispanic Black",
race_raw=="hisp" ~ "Hispanic",
TRUE ~ "NA"
)
)
ggplot(
df_race ,
aes(x=prace, y=turnout / turnout_16)
) +
geom_hline(yintercept=1) +
geom_point(aes(size=turnout_16, color=race_raw), alpha=0.8, pch=16) +
facet_grid(race_formatted ~ .) +
theme_sixtysix() +
scale_color_manual(
values=c(
nhblack=strong_blue,
nhwhite=strong_red,
hisp=strong_orange
),
guide=FALSE
) +
labs(
x="Proportion of Population",
y="Votes cast 2020 / Votes cast in 2016",
title="Votes are down in Hispanic wards, flat in Black wards",
subtitle=sprintf("Dots are wards, each ward shows up in each facet. Recorded votes as of %s.", format(Sys.time(), "%m/%d %H:%M")),
size="2016 votes cast"
)
Turnout was also way down in the student- and recent-grad-heavy wards. Presumably, these young voters just voted from their parents’ house, thanks to Covid. It may be a wash at the state level, though we certainly lost some out-of-state strategic swing voters, and it overall makes Philadelphia look disproportionately low.
How much better would the city’s turnout look with the students added back in? If we think they’re worth 30K votes, that would put Philadelphia still at the bottom of the pack, but not an outlier. I’ll dig into these results more when the individual-level voter file is updated.
View code
ggplot(
wards,
aes(x=p_1824_col, y=turnout/turnout_16)
) +
geom_hline(yintercept=1) +
geom_point(
aes(size=turnout_16),
alpha=0.8,
color=strong_grey,
pch=16
) +
scale_size_area() +
theme_sixtysix() +
labs(
x="Proportion of over-18 pop that is 18-24 and has at least some college",
y="Votes cast 2020 / Votes cast in 2016",
title="Votes are down in young, educated wards",
size="2016 votes cast"
)
North Philly’s Hispanic wards not only turned out less, but shifted their preferences towards Trump. In 2016, Clinton won the 7th by 89 percentage points, Biden only won by 67. Meanwhile, Manayunk is the only neighborhood with a sizeable swing towards Biden.
View code
lf_pref <- make_leaflet(
data=wards,
get_color=function(df) 100*((df$votes_D - df$votes_R)/df$turnout - (df$votes_16_D - df$votes_16_R)/df$turnout_16),
is_percent = TRUE,
title=sprintf("Change in %% gap vs 2016."),
is_race=TRUE,
diverge_at_zero = TRUE,
zoom=11
)
The result is a net change in the overall vote gap of +471K for Biden, down from +475 for Clinton in 2016. Again, not a huge change, but not in the direction that would explain Biden’s win. Turnout changes in Center City and Manayunk drove the largest increase in the gap for Biden, offset by shrinking gaps nearly everywhere else.
View code
lf_gap <- make_leaflet(
data=wards,
get_color=function(df) ((df$votes_D - df$votes_R) - (df$votes_16_D - df$votes_16_R)),
is_percent = FALSE,
title=sprintf("Change in vote gap vs 2016 (combined turnout and preferences)."),
is_race=TRUE,
diverge_at_zero = TRUE,
zoom=11
)