Mail-In Votes, 2021 Primary

With a tense race for District Attorney and the typically-packed race for Common Pleas, we’re all looking for any signals we can get.

The friendly folks at the Commissioners’ Office have shared mail-in ballot requests with me, so let’s dig in.

View code
library(dplyr)
library(tidyr)
library(ggplot2)
library(sf)
library(scales)

devtools::load_all("../../admin_scripts/sixtysix/")

mail <- readxl::read_xlsx(
  "../../data/raw_election_data/Vote by Mail Listing (5-11-21).xlsx",
  sheet = "Voter Listing"
)

dup_ids <- mail$`ID Number`[duplicated(mail$`ID Number`)] %>% unique()

mail_dup <- mail %>% filter(`ID Number` %in% dup_ids)
mail <- mail %>% filter(!`ID Number` %in% dup_ids)

mail_dup <- mail_dup %>%
    group_by(`ID Number`) %>%
    summarise(
      across(
        c(AppReturnedDate, BallotSent, BallotReturned),
        function(var){
          if(all(is.na(var))) return(NA)
          max(var, na.rm=T)
        }
      ),
      PartyDesc=ifelse(any(PartyDesc=="DEMOCRATIC"), "DEMOCRATIC", PartyDesc[1])
    )

mail <- bind_rows(mail, mail_dup)

df_major <- readRDS("../../data/processed_data/df_major_type_20210118.Rds")
head(df_major)
n_mail <- mail %>% 
  filter(PartyDesc=="DEMOCRATIC") %>% 
  mutate(ret = !is.na(BallotReturned)) %>%
  group_by(ret) %>% 
  count()

Some 78,178 Democratic voters requested ballots, and 35,802 of them had already been returned as of May 11th. That suggests we’ll get close to the 155,000 votes cast in 2017.

View code
turnout <- df_major %>% 
  filter(is_topline_office) %>%
  filter(election_type=="primary", party=="DEMOCRATIC") %>%
  group_by(year, ward, type) %>%
  summarise(votes=sum(votes)) %>%
  group_by(year, ward) %>%
  mutate(total=sum(votes)) %>%
  ungroup() %>%
  pivot_wider(names_from="type", values_from="votes", values_fill = list(votes=0))

cycle <- function(year){
  year <- asnum(year)
  cycle_num <- year %% 4
  c("President", "District Attorney", "Governor", "Mayor")[cycle_num+1]
}

ggplot(
  turnout %>% group_by(year) %>%
    summarise(across(A:total, sum)) %>%
    mutate(
      cycle=cycle(year)
    ),
  aes(x=asnum(year), y=total, color=cycle)
) +
  geom_line(aes(group=cycle), size=2) +
  scale_color_manual(
    values=with(
      colors_sixtysix(), 
      c(
        President = light_red, `District Attorney`=light_green, 
        Governor = light_orange, Mayor = light_blue,
        "2021 Mail-In\nRequests"=light_green
      )
    ),
    guide=FALSE
  ) +
  theme_sixtysix() +
  expand_limits(y=0) +
  scale_y_continuous(labels=scales::comma) +
  geom_point(
    data=data.frame(year=2021, total=sum(n_mail$n), cycle="District Attorney"),
    size=4
  ) +
  geom_text(
    data=tribble(
      ~year, ~cycle, ~total,
      2016, "President", 370e3,
      2015.5, "Mayor", 250e3,
      2015.2, "Governor", 190e3,
      2012, "District Attorney", 40e3,
      2018.7, "2021 Mail-In\nRequests", 40e3
    ),
    hjust=0,
    size=4,
    aes(label=cycle)
  )+
  labs(
    title="Votes in Democratic Primaries",
    y="Votes cast for topline office",
    x=NULL,
    fill=NULL
  )

But there are big unknowns in what mail-in requests tell us about overall turnout. We’ve only had two elections with no-excuse mail-in voting, and this might be the first where we’ll see close to “normal” mail-in use. For last year’s elections, we were both at the height of the pandemic and we saw mail-ins became politically polarized. In this election I expect mail-in usage to be much more a function of convenience to individual voters. That would mean ward counts will be closer to proportional to overall turnout, but will still deviate in ways that we don’t today know; there’s no past data.

View code
library(leaflet)

make_leaflet <- function(
  df, 
  title=NULL, 
  fill_col="fill",
  popup_col="popup",
  zoom=11
){
  
  pal <- cut_vals(df[[fill_col]])
  
  legend <- create_legend(
    pal$min, 
    pal$max, 
    pal$step_size,
    pal$pal
  )
  
  init_map(df, zoom) %>%
    addPolygons(
      data=df$geometry,
      weight=0, 
      color="white", 
      opacity=1, 
      fillOpacity = 0.8, 
      smoothFactor = 0,
      fillColor = pal$pal(df[[fill_col]]),
      popup=df[[popup_col]]
    ) %>%
    addControl(title, position="topright", layerId="map_title")  %>%
    addLegend(
      layerId="geom_legend",
      position="bottomright",
      colors=legend$colors,
      labels=legend$labels
    )
}

init_map <- function(data, zoom){
  bbox <- st_bbox(data)
  
  leaflet(
    options=leafletOptions(
      minZoom=zoom,
      maxZoom=zoom,
      zoomControl=TRUE,
      dragging=FALSE
      )
  )%>% 
    setView(
      zoom=zoom,
      lng=mean(bbox[c(1,3)]),
      lat=mean(bbox[c(2,4)])
    ) %>%
    addProviderTiles(providers$Stamen.TonerLite) %>%
    setMaxBounds(
      lng1=bbox["xmin"],
      lng2=bbox["xmax"],
      lat1=bbox["ymin"],
      lat2=bbox["ymax"]
    )
}

cut_vals <- function(x){
  min_value <- min(x, na.rm=TRUE)
  max_value <- max(x, na.rm=TRUE)
  
  sigfig <- round(log10(max_value-min_value)) - 1
  n_steps <- (max_value-min_value)/10^sigfig
  step_size <- ifelse(n_steps >= 10, 2, 1) * 10^(sigfig)
  
  min_value <- step_size * floor(min_value / step_size)
  max_value <- step_size * ceiling(max_value / step_size)
  
  pal <- colorNumeric(
    "viridis", 
    domain=c(min_value, max_value)
  )

  list(
    min=min_value,
    max=max_value,
    step_size=step_size,
    pal=pal
  )
}

create_legend <- function(
  min_value, 
  max_value, 
  step_size,
  pal
){ 
  legend_values <- seq(min_value, max_value, step_size)
  legend_colors <- pal(legend_values)
  legend_labels <- scales::comma(legend_values,accuracy=step_size)
 
  list(
    colors=legend_colors,
    values=legend_values,
    labels=legend_labels
  )
}
View code
divs <- st_read("../../data/gis/warddivs/202011/Political_Divisions.shp", quiet=T) %>%
  mutate(warddiv = sprintf("%s-%s", substr(DIVISION_N,1,2), substr(DIVISION_N, 3, 4)))

wards <- st_read("../../data/gis/warddivs/201911/Political_Wards.shp", quiet=T) %>%
  mutate(ward = sprintf("%02d", asnum(WARD_NUM)))

mail_div <- mail %>%
  filter(PartyDesc=="DEMOCRATIC") %>%
  mutate(warddiv = paste0(Ward, "-", Division)) %>%
  group_by(warddiv) %>%
  summarise(
    apps = sum(!is.na(AppReturnedDate)),
    ballots = sum(!is.na(BallotSent)),
    returned = sum(!is.na(BallotReturned))
  )

mail_ward <- mail_div %>%
  mutate(ward = substr(warddiv, 1, 2)) %>%
  group_by(ward) %>%
  summarise(across(apps:returned, sum))

turnout_2020_gen <- df_major %>%
  filter(year==2020, election_type=="general", is_topline_office) %>%
  group_by(ward, type) %>%
  summarise(votes=sum(votes)) %>%
  group_by(ward) %>%
  mutate(total=sum(votes)) %>%
  ungroup() %>%
  pivot_wider(names_from=type, values_from=votes)
  
wards_lf <- wards %>%
  left_join(mail_ward) %>%
  left_join(turnout %>% filter(year==2017)) %>%
  left_join(turnout_2020_gen, by="ward", suffix=c("", ".20")) %>%
  mutate(
    popup=sprintf(
      paste(
        c(
          "<b>Ward %s</b>",
          "Mail-In Requests (Dem): %s",
          "Mail-In Returns (Dem): %s",
          "Votes 2017 (Dem): %s",
          "Mail-In Votes Nov 2020: %s"
        ),
        collapse = "<br>"
      ),
      ward,
      comma(apps),
      comma(returned),
      comma(total),
      comma(M.20)
    )
  )

mail_lf <- make_leaflet(wards_lf, title="Mail-In Requests", fill_col="apps")

Mail-in applications are about 30% of what we saw in November, with North Philly and the Universities significantly lagging (which is typical for turnout in Municipal versus Presidential elections).

View code
prop_lf <- wards_lf %>%
  mutate(fill=apps / M.20) %>%
  make_leaflet(title="Mail-In Requests, May 2021 / Nov 2020")
View code
library(lubridate)
mail_time <- mail %>%
  mutate(
    app = mdy(BallotSent),
    ret = mdy(BallotReturned)
  ) %>%
  select(app, ret) %>%
  pivot_longer(cols=c(app,ret), values_to = "date") %>%
  filter(!is.na(date)) %>%
  group_by(name, date) %>%
  count()

dates <- seq(from=min(mail_time$date), to=max(mail_time$date), by="days")

mail_time <- as.data.frame(expand_grid(date=dates, name=c("app", "ret"))) %>%
  left_join(mail_time) %>%
  group_by(name) %>%
  arrange(date) %>%
  mutate(n = ifelse(is.na(n), 0, n)) %>%
  mutate(cum = cumsum(n))


ggplot(
  mail_time %>% mutate(var=c(app="Ballots Sent", ret="Ballots Returned")[name]),
  aes(x=date, y=cum, color=var)
) +
  geom_line(aes(group=var), size=2) +
  theme_sixtysix() +
  scale_x_date(limits=c(ymd("2021-04-15"), max(mail_time$date))) +
  scale_y_continuous(labels=scales::comma) +
  scale_color_manual(
    values=c(
      `Ballots Sent`=colors_sixtysix()$light_red, 
      `Ballots Returned`=colors_sixtysix()$strong_grey
    ),
    guide=FALSE
  ) +
  labs(
    y=NULL,
    x=NULL,
    title="Ballot Returns as of May 11th"
  ) +
  annotate(
    "text",
    x=ymd("2021-05-04"),
    y=c(20e3, 72e3),
    label=c("Ballots Returned", "Ballots Sent"),
    hjust=0,
    size=4,
    fontface="bold",
    color=colors_sixtysix()[c("strong_grey", "light_red")]
  )

Party Switches

In my post on party changes, I assumed the changes were strategic: Republicans switching to Democrats so they could vote in the closed Primary. But maybe those were just Republicans who were finally fed up with the party after January 6th?

Comparing Philadelphia to other counties provides strong evidence that the switches are, in fact, strategic. If the changes were due to distaste with the party, we might expect Philadelphia’s party switches to look like other counties. And in fact the rate of Republicans switching to a third party or independent looks the same in Philadelphia as the rest of the state. But the rate of Republicans switching to Democrats is more than five times as high in Philadelphia as any other county, including the Philadelphia suburbs and Allegheny.

View code
fve_names <- read.csv("../../data/voter_registration/col_names.csv")

read_fve <- function(ds, county="PHILADELPHIA"){
  path <- sprintf(
    "../../data/voter_registration/%1$s/%2$s FVE %1$s.txt", 
    ds, toupper(county)
  )
  fve <- readr::read_tsv(
    path,
    col_types = paste0(rep("c", nrow(fve_names)), collapse=""), 
    col_names = as.character(fve_names$ï..name)
  ) %>%
    mutate(    
      party = case_when(
        `Party Code` %in% c("D", "R", NA) ~ as.character(`Party Code`), 
        TRUE~"Oth"
      ),
    )
  fve
}

lagged_fve <- function(first, second, county="PHILADELPHIA"){
  first <- read_fve(first, county)
  second <- read_fve(second, county)
  second <- second %>%
    left_join(
      first %>% select(`ID Number`, party, `Street Name`),
      by="ID Number",
      suffix=c(".1", ".0")
    ) %>%
    mutate(
      party_switch = paste0(party.0, "_", party.1),
      ward =  substr(`Precinct Code`, 1, 2),
      warddiv = paste0(ward, "-", substr(`Precinct Code`, 3, 4)),
      county=county
    )
  second
}

if(FALSE){
  fve_21 <- lagged_fve("20201019", "20210510")
  saveRDS(fve_21, "fve_21.RDS")  
} else {
  fve_21 <- readRDS("fve_21.RDS")
}

# fve_21 %>% 
#   filter(!is.na(`Election 3 Vote Method`)) %>%
#   filter(party_switch %in% c("D_D", "R_D")) %>%
#   left_join(mail, by="ID Number") %>%
#   mutate(app=!is.na(AppReturnedDate), ret=!is.na(BallotReturned)) %>%
#   group_by(party_switch, app, ret) %>%
#   count()
# 
# fve_21 %>% 
#   filter(party_switch %in% c("D_D", "R_D")) %>%
#   left_join(mail, by="ID Number") %>%
#   mutate(app=!is.na(AppReturnedDate), ret=!is.na(BallotReturned)) %>%
#   group_by(party_switch, app, ret) %>%
#   count()

How about mail-in application rates among them? Of the 6,384 Republican to Democrat switchers, 672 have requested a mail-in ballot and 300 have returned it. That rate is higher than among Democrat registrants who stayed Democrats (77K requests out of 784K voters), but it’s lower than among the subset of those stably-Democrat registrants who also voted in November 2020 (75K requests out of 567K voters). So these party-switchers are not particularly likely to request mail-in ballots. But they also may just be planning to vote in person.

Krasner v. Vega

I’ve been trying to figure out what to write on the District Attorney race for a bit. It’s really, really hard for me to imagine Krasner losing, if only because incumbents in Philadelphia rarely lose, and the only recent examples of it used an entirely different lane than Vega is attempting.

The best way to write this post would be to use a survey. But I don’t have one, so instead I’ll look at some past elections to understand if Vega winning is just unlikely or near impossible. This type of analysis has led me wrong before (though, to this day, I maintain that I wasn’t that wrong if you read the conclusion).

The last time an incumbent DA lost was when Ed Rendell beat Emmett Fitzpatrick in the 1977 Democratic Primary. Since then, we’ve had five DAs–Rendell, Castille, Abraham, Williams, Krasner–and the only times an incumbent was replaced were when they declined to run, either seeking other political office or with legal trouble. The closest Lynne Abraham came to losing in her four reelections was a 56-44 win over Seth Williams in 2005.

View code
library(dplyr)
library(tidyr)
library(ggplot2)

devtools::load_all("../../admin_scripts/sixtysix/")
df_major <- readRDS("../../data/processed_data/df_major_type_20210118.Rds")

da_res <- df_major %>% 
  filter(
    election_type=="primary", party=="DEMOCRATIC", office=="DISTRICT ATTORNEY"
  ) %>%
  group_by(candidate, office, year) %>%
  summarise(votes=sum(votes)) %>%
  arrange(year, desc(votes))

However, we have seen challengers beat incumbents in two recent races for other offices. Kendra Brooks won an At Large Council seat from Republican Al Taubenberger in 2019, and Rebecca Rhynhart won the 2017 race for City Controller from Alan Butkovitz. The problem for Vega is that they both did that by dominating the Wealthy Progressive divisions: Center City and the ring around it, coupled with Chestnut Hill and Mount Airy. Vega is attempting to do the opposite.

View code
library(sf)
divs <- st_read("../../data/gis/warddivs/202011/Political_Divisions.shp") %>%
  mutate( warddiv = pretty_div(as.character(DIVISION_N)))cands <- tribble(
  ~candidate, ~election_type, ~year, 
  "KENDRA BROOKS", "general", 2019,
  "REBECCA RHYNHART", "primary", 2017
)

cand_df <- df_major %>% filter(
  (office == "COUNCIL AT LARGE" & year == 2019 & election_type == "general") |
    (office == "CITY CONTROLLER" & year == 2017 & election_type == "primary" & party == "DEMOCRATIC")
) %>%
  group_by(candidate, office, year, warddiv) %>%
  summarise(votes = sum(votes)) %>%
  group_by(office, year, warddiv) %>%
  mutate(total_votes = sum(votes)) %>%
  ungroup() %>%
  filter(candidate %in% c("KENDRA BROOKS", "REBECCA RHYNHART")) %>%
  mutate(pvote = 100 * votes / total_votes) %>%
  group_by(candidate) %>%
  mutate(
    overall_pvote = weighted.mean(pvote, total_votes),
    pvote_norm = pvote / overall_pvote
  )

map_cand <- function(cand, year){
  ggplot(
    divs %>% left_join(cand_df) %>% filter(candidate == cand)
  ) + 
    geom_sf(
      aes(fill=pvote),
      color=NA
    ) +
    scale_fill_viridis_c() +
    theme_map_sixtysix() +
    labs(
      title = sprintf("Results for %s, %s", format_name(cand), year),
      fill = "% of Vote"
    )
}

map_cand("KENDRA BROOKS", 2019)

View code
map_cand("REBECCA RHYNHART", 2017)

Vega’s path is particularly hard because of the shifting geography of votes in the city. With Philadelphia’s changing voting blocs, Black Voter divisions now cast about 45% of the votes in Democratic Primaries, and Wealthy Progressives over 30%. White Moderate divisions (South Philly and the Northeast) and Hispanic Voter divisions have declining vote shares: together they today constitute about 26% of the votes in primaries, down from over 40% in the early 2000s. These numbers could change in this Tuesday’s election if Vega has successfully mobilized those divisions, but probably not enough to by itself swing the election.

Instead, Vega will need to be competitive in the Black Voter and Wealthy Progressive divisions. Even if they only represent 2/3 of the vote, losing them 60-40 would mean Vega needs to win the White Moderates by 70-30.

And those divisions are where Krasner did best. He won over 50% of the vote among the Wealthy Progressives, in a seven-person race. And he won nearly 40% in the Black Voter divisions.

View code
svd_time <- readRDS("../svd_time/svd_time_res_20201205.RDS")

K <- 3
mutate_add_score <- function(U_df, D, year, min_year=2002){
  year_dm <- year - min_year
  for(k in 1:K){
    var.k <- function(x) sprintf("%s.%i", x, k)
    U_df[[var.k("score")]] <- D[k] * (
      U_df[[var.k("alpha")]] + U_df[[var.k("beta")]] * year_dm
    )
  }
  return(U_df)
}

div_cats <- purrr::map(
  c(2002, 2017, 2020), 
  function(y) mutate_add_score(svd_time$U, svd_time$D, y, 2002)
) %>%
  bind_rows(.id = "id") %>%
  mutate(year = c(2002, 2017, 2020)[as.integer(id)])

km <- kmeans(
  div_cats[, c("score.1","score.2","score.3")], 
  centers=matrix(
    10 * c(
      1, -1, 0, 
      -1, 1, 0, 
      0, -1, -1, 
      -1, -1, 1
    ), 
    4, 3, 
    byrow=T
  )
)

cats <- c(
  "Black Voters",
  "Wealthy Progressives",
  "Hispanic Voters",
  "White Moderates"
)
div_cats$cluster <- factor(cats[km$cluster], levels=cats)
cat_colors <- with(colors_sixtysix(), c(light_blue, light_red, light_orange, light_green))
names(cat_colors) <- cats  

df_da <- df_major %>% 
  filter(
    year == 2017, election_type=="primary", party == "DEMOCRATIC", office == "DISTRICT ATTORNEY"
  ) %>%
  group_by(candidate, warddiv) %>%
  summarise(votes=sum(votes)) %>%
  group_by(warddiv) %>%
  mutate(
    total_votes=sum(votes),
    pvote = votes/sum(votes)
  )
  
ggplot(
  df_da %>% left_join(div_cats %>% filter(year == 2017)) %>%
    group_by(cluster, candidate) %>%
    summarise(
      pvote = weighted.mean(pvote, total_votes),
      total_votes=sum(total_votes)
    ) %>%
    filter(candidate != "Write In") %>%
    group_by(candidate) %>%
    mutate(pvote_overall = weighted.mean(pvote, total_votes)) %>%
    ungroup() %>%
    arrange(desc(pvote_overall)) %>%
    mutate(
      candidate=format_name(candidate),
      candidate = factor(candidate, levels=unique(candidate))
    )
) +
  geom_bar(aes(x=cluster, y=100*pvote, fill=cluster), stat="identity") +
  facet_wrap(~candidate) +
  scale_fill_manual(values=cat_colors) +
  theme_sixtysix() %+replace% 
  theme(
    axis.text.x = element_blank(), 
    plot.title = element_text(face="bold", size=14, hjust = 0)
  ) +
  labs(
    title="Krasner won big in Black Voter and Wealthy Progressive Divs",
    subtitle="2017 District Attorney Primary",
    x=NULL,
    y="Percent of Vote",
    fill=NULL
  )

Naively, if the election looked like 2017, I’d expect Krasner to win both of these blocs by at least 70-30 in a two-way race. That would put the election away. Vega wouldn’t be able to win just by coalescing the South Philly and Northeast White Moderates and Hispanic North Philly. Has the opinion of Krasner shifted in those super supportive divisions by that much in that last four years? This is where surveys would really help, but strikes me as unlikely.