Redistricting Council

With the release of 2020’s Census data, redistricting will kick into gear. While boundaries for both the US Congress and State Senate and House will be redrawn, I thought I’d start close to home: City Council.

Philadelphia’s City Council consists of 17 seats, 10 of which are districted and the rest At Large. For the drawing of those 10 districts, the primary restriction imposed by the Charter is that they must have relatively equal populations. This has been implemented as each district being within 5% of one tenth of the city’s total population. Council has six months from the release of the data to produce maps, which would put the deadline at March 12, though that might depend on resolution of the allocation of incarcerated people.

I’ll use my jaywalkr library to crosswalk the data.

Methodological Note: I haven’t adapted the crosswalking for the Census’ Differential Privacy. It shouldn’t make a huge difference when aggregated to council districts. For the later analyses, I’ll use Block Groups which are less affected than Blocks.

View code
library(sf)
library(dplyr)
library(ggplot2)
library(readr)
library(tidyr)
devtools::load_all("../../admin_scripts/sixtysix/")

bgs <- st_read("../../data/gis/census/tl_2020_42_bg/tl_2020_42_bg.shp", quiet=TRUE) %>%
  filter(COUNTYFP == "101")

council <- st_read("../../data/gis/city_council/Council_Districts_2016.shp", quiet=TRUE)
council <- st_transform(council, st_crs(bgs))

blocks <- st_read(
  "../../data/gis/census/tl_2020_42101_tabblock/tl_2020_42101_tabblock20.shp", 
  quiet=TRUE
)

block_pops <- read_csv(
  "../../data/census/decennial_2020_poprace_phila_blocks/DECENNIALPL2020.P4_data_with_overlays_2021-09-22T083940.csv",
  skip=1
)

block_pops <- block_pops %>% rename(total_pop = `!!Total:`) %>% select(id, total_pop) 
block_pops <- block_pops %>% mutate(GEOID20 = substr(id, 10, 25))

blocks <- blocks %>% left_join(block_pops, by="GEOID20")
block_centroids <- st_centroid(blocks) %>% filter(total_pop > 0)

bg_pops <- read_csv(
  "../../data/census/decennial_2020_poprace_phila_bg/DECENNIALPL2020.P2_data_with_overlays_2021-09-21T090431.csv", 
  skip=1
) %>%
  mutate(GEOID = substr(id, 10, 25))

bg_pops <- bg_pops %>%
  mutate(
    total_pop = `!!Total:`,
    hispanic = `!!Total:!!Hispanic or Latino`,
    black = `!!Total:!!Not Hispanic or Latino:!!Population of one race:!!Black or African American alone`,
    white = `!!Total:!!Not Hispanic or Latino:!!Population of one race:!!White alone`,
    asian = `!!Total:!!Not Hispanic or Latino:!!Population of one race:!!Asian alone`
  ) %>%
  select(GEOID, total_pop:asian)

# table(is.na(blocks$total_pop))

divs <- st_read("../../data/gis/warddivs/202011/Political_Divisions.shp", quiet=TRUE) %>%
  st_transform(st_crs(bgs)) %>%
  mutate(warddiv=pretty_div(DIVISION_N))

devtools::load_all("../../admin_scripts/libs/jaywalkr/")

div_bg_cw <- crosswalk_geoms(
  divs$geometry,
  bgs$geometry,
  block_centroids$geometry,
  block_centroids$total_pop,
  divs$warddiv,
  bgs$GEOID,
  allow_unmatched_weights = "distance",
  verbose=FALSE
)

df_major <- readRDS("../../data/processed_data/df_major_20210118.Rds")
df_major <- df_major %>% mutate(candidate=factor(candidate))
levels(df_major$candidate) <- format_name(levels(df_major$candidate))
df_major$candidate <- as.character(df_major$candidate)

bg_votes <- df_major %>%
  filter(year == 2019, election_type=="primary", office =="MAYOR") %>%
  group_by(warddiv) %>%
  summarise(votes=sum(votes)) %>%
  left_join(div_bg_cw, by=c("warddiv" = "geom.id.x")) %>%
  group_by(geom.id.y) %>%
  summarise(votes = sum(votes * from_x_to_y, na.rm=TRUE))

bgs <- bgs %>% left_join(bg_pops) %>% left_join(bg_votes, by=c("GEOID" = "geom.id.y"))

bg_council_cw <- crosswalk_geoms(
  bgs$geometry,
  council$geometry,
  weight_pts=block_centroids$geometry,
  weights=block_centroids$total_pop,
  x_id=bgs$GEOID,
  y_id=as.character(council$DISTRICT),
  allow_unmatched_weights = "distance",
  verbose=FALSE
)

div_council_cw <- crosswalk_geoms(
  divs$geometry,
  council$geometry,
  weight_pts=block_centroids$geometry,
  weights=block_centroids$total_pop,
  x_id=divs$warddiv,
  y_id=as.character(council$DISTRICT),
  allow_unmatched_weights = "distance",
  verbose=FALSE
)

council_pops <- as.data.frame(bgs) %>% 
  filter(total_pop > 0) %>%
  left_join(bg_council_cw, by=c("GEOID"="geom.id.x")) %>%
  group_by(geom.id.y) %>%
  summarise(
    across(
      total_pop:asian,
      function(x) sum(x * from_x_to_y, na.rm=TRUE),
      .names="{.col}_sum"
    ),
    across(
      total_pop:asian,
      function(x) sum(x * from_x_to_y^2 * votes, na.rm=TRUE),
      .names="{.col}_vote_weighted"
    ),
    total_votes = sum(votes)
  ) %>%
  mutate(
    across(
      hispanic_vote_weighted:asian_vote_weighted,
      function(x) x/total_pop_vote_weighted,
      .names="p_{.col}"
    ),
    across(
      hispanic_sum:asian_sum,
      function(x) x/total_pop_sum,
      .names="p_{.col}"
    )
  )

council <- council %>%
  left_join(council_pops, by=c("DISTRICT"="geom.id.y"))

council <- council %>%
  mutate(
    across(
      hispanic_sum:asian_sum,
      function(x) x / total_pop_sum,
      .names="p_{.col}"
    )
  )
TARGET_POP = sum(bg_pops$total_pop)/10

library(scales)

pct <- function(x, digits=0){
  paste0(round(100*x, digits=digits), "%")
}

color_text <- function(pop, target){
  color <- ifelse(
    pop > target,
    colors_sixtysix()$strong_blue,
    colors_sixtysix()$strong_red
  )
  sprintf(
    '<span style="color:%s">%s (%s) %s target</span>',
    color,
    comma(abs(pop - target)),
    pct(abs(pop - target)/target),
    ifelse(pop > target, "over", "under")
  )
}

council <- council %>%
  mutate(
    popup=glue::glue(
      "<b>District {DISTRICT}</b><br>
      2020 Pop: {comma(total_pop_sum)}<br>
      {color_text(total_pop_sum, TARGET_POP)}<br>
      % NH Black: {pct(p_black_sum)}<br>
      % NH White: {pct(p_white_sum)}<br>
      % Hispanic: {pct(p_hispanic_sum)} <br>
      % Asian: {pct(p_asian_sum)} <br>"
    )
  )

lf_pops <- make_leaflet(
  df=council, 
  fill_col="total_pop_sum", 
  popup_col = "popup",
  zoom=11,
  pal_type = "divergent",
  midpoint=TARGET_POP
) %>%
  addLabelOnlyMarkers(
    data=council$geometry %>% st_centroid() %>% st_coordinates() %>% as.data.frame(),
    lng=~X,
    lat=~Y,
    label=council$DISTRICT,
    labelOptions=labelOptions(permanent=TRUE, textOnly=TRUE, textsize="16px", style=list(fontWeight="bold"))
  ) %>%
  addPolygons(data=council, fill=FALSE, color="white", opacity = 1.0, weight = 2)

cat(render_iframe(lf_pops))

Four districts are too populous, with Districts 1 and 5 above the 5% margin. The Northwest’s 4 and 8 are below the target by more than 5%.

The existing districts are relatively well representative of Philadelphia’s racial composition. Four districts are predominantly Black, two predominantly White, and one predominantly Hispanic. Of the three without a racial majority, North Philly’s District 5 is 41% Black and 38% White, South Philly’s 2 is 41% White and 39% Black, and the River Wards’ 6 is 46% White, 20% Hispanic, and 18% Black.

View code
council_long <- council %>% 
  as.data.frame() %>%
  tidyr::pivot_longer(
    p_hispanic_sum:p_asian_sum, 
    names_to="race",
    names_pattern = "p_([a-z]+)_sum",
    values_to = "prop"
  ) %>%
  mutate(race=format_name(race))

overall_demo <- council_long %>%
  group_by(race) %>%
  summarise(prop = weighted.mean(prop, w=total_pop_sum))


binwidth <- 5
breaks <- seq(0, 100, binwidth)
council_long$bin <- as.numeric(cut(100*council_long$prop, breaks)) * binwidth - binwidth/2

save_and_render_image <- function(gg, file=NULL, hover="", ...){
  DIR <- "images"
  if(!dir.exists(DIR)) dir.create(DIR)
  
  if(is.null(file)){
    obj.name <- deparse(substitute(gg))
    file <- sprintf("%s.png", obj.name)
  }

  path <- paste0(DIR,"/", file)
  ggsave(filename=path, plot=gg,  ...)
  sprintf("![%s](%s)", hover, path)
}

source("../../admin_scripts/sixtysix/R/theme_sixtysix.R")
bar_demo_overall <- ggplot(council_long, aes(x = bin, y=1)) +
  geom_bar(stat = "identity", colour = "black", width = 5, fill="grey80") +
  geom_text(aes(label=DISTRICT),
   position=position_stack(vjust=0.5), colour="black") +
  facet_wrap(~race) +
  geom_vline(
    data=overall_demo,
    linetype="dashed",
    aes(xintercept=100*prop)
  ) +
  geom_vline(
    xintercept=50, color=grey(0.4)
  ) +
  theme_sixtysix() +
  scale_x_continuous(breaks=seq(0,100,10)) +
  labs(
    title="Council District Demographics",
    subtitle="Dashed lines are Philadelphia's overall demographics",
    x="Percent of Demographic",
    y="Number of Districts"
  )

cat(save_and_render_image(bar_demo_overall))

Weighting Block Groups’ demographics by votes (I’ll use the 2019 Mayoral Primary) doesn’t change the topline much, but does switch some orders: it makes District 2 46% White, 34% Black; District 5 43% White, 37% Black; and pushes District 6 to a majority 52% White.

View code
# council %>%
#   select(DISTRICT, p_white_sum, p_white_vote_weighted, p_black_sum, p_black_vote_weighted, p_hispanic_sum, p_hispanic_vote_weighted)
get_election_df <- function(filtered_df_major){
  get_popup <- function(office, year, election_type, candidate, votes, pvote){
    res <- glue::glue("{format_name(office[1])} {year[1]} {format_name(election_type[1])}")
    order <- order(votes, decreasing=TRUE)
    order <- order[order %in% which(votes>0)]
    lines <- glue::glue("{candidate[order]}: {comma(votes[order])} ({pct(pvote[order])})")
    res <- paste(c(res, lines), collapse="<br>")
  }
  
  winner_df <- filtered_df_major %>%
    group_by(warddiv, year, election_type, office) %>%
    mutate(
      rank = rank(desc(votes)),
      pvote=votes/sum(votes)
    ) %>%
    summarise(
      winner = candidate[rank==1],
      pvote_winner=pvote[rank==1],
      total_votes=sum(votes),
      popup=get_popup(office, year, election_type, candidate, votes, pvote),
      .groups="drop"
    )
  wide_df <- filtered_df_major %>%
    select(warddiv, year, election_type, office, candidate, votes) %>%
    group_by(warddiv) %>%
    mutate(pvote = votes/sum(votes)) %>%
    ungroup() %>%
    pivot_wider(names_from = candidate, values_from=c(votes, pvote))
  
  winner_df %>% left_join(wide_df)
}

pres_16 <- df_major %>%
  filter(
    year == 2016, 
    election_type=="primary", 
    party=="DEMOCRATIC",
    office=="PRESIDENT OF THE UNITED STATES"
  ) %>%
  get_election_df()

da_17 <- df_major %>%
  filter(
    year == 2017,
    election_type=="primary",
    party=="DEMOCRATIC",
    office=="DISTRICT ATTORNEY"
  ) %>%
  get_election_df()

get_winner_color <- function(election_df){
  candidates <- election_df %>% 
    filter(!is.na(winner)) %>%
    group_by(winner) %>% 
    count() %>% 
    arrange(desc(n)) %>%
    with(winner)
  colors <- with(
    colors_sixtysix(), 
    c(
      strong_blue, strong_orange, strong_purple, 
      strong_green, strong_red, light_yellow, 
      strong_grey, strong_grey
    )[1:length(candidates)]
  )
  names(colors) <- candidates
  colors
}

To understand the local political dynamics in the districts, consider two recent Democratic Primaries that split the city: the 2016 Presidential and the 2017 District Attorney Primaries.

View code
council_pres <- pres_16 %>% left_join(div_council_cw, by=c("warddiv"="geom.id.x")) %>%
  pivot_longer(starts_with("votes_"), names_pattern = "^votes_(.*)$", values_to="votes") %>%
  group_by(geom.id.y, name) %>%
  summarise(votes=sum(votes)) %>%
  filter(name %in% c("Bernie Sanders", "Hillary Clinton")) %>%
  group_by(geom.id.y) %>%
  mutate(pvote=votes/sum(votes)) %>%
  filter(name %in% c("Bernie Sanders"))

binwidth <- 5
breaks <- seq(0, 100, binwidth)
council_pres$bin <- as.numeric(cut(100*council_pres$pvote, breaks)) * binwidth - binwidth/2
  
pres_bar <- ggplot(council_pres, aes(x = bin, y=1)) +
  geom_bar(stat = "identity", colour = "black", width = 5, fill="grey80") +
  geom_text(aes(label=geom.id.y),
   position=position_stack(vjust=0.5), colour="black") +
  theme_sixtysix() +
  expand_limits(x=c(0,100)) +
  labs(
    title="Council District 2016 Primary Results",
    x="Two-way Percent for Bernie Sanders",
    y="Count of Districts"
  )
cat(save_and_render_image(pres_bar))

Sanders won no district overall in 2016, but came close in Districts 1, 6, and 10. Interestingly, District 3 combines his best neighborhoods in University City with some of Clinton’s best in farther West Philly.

View code
council_da <- da_17 %>% left_join(div_council_cw, by=c("warddiv"="geom.id.x")) %>%
  pivot_longer(starts_with("votes_"), names_pattern = "^votes_(.*)$", values_to="votes") %>%
  group_by(geom.id.y, name) %>%
  summarise(votes=sum(votes)) %>%
  group_by(geom.id.y) %>%
  mutate(pvote=votes/sum(votes)) %>%
  filter(name %in% c("Lawrence S Krasner"))

binwidth <- 5
breaks <- seq(0, 100, binwidth)
council_da$bin <- as.numeric(cut(100*council_da$pvote, breaks)) * binwidth - binwidth/2
  
bar_da <- ggplot(council_da, aes(x = bin, y=1)) +
  geom_bar(stat = "identity", colour = "black", width = binwidth, fill="grey80") +
  geom_text(aes(label=geom.id.y),
   position=position_stack(vjust=0.5), colour="black") +
  theme_sixtysix() +
  expand_limits(x=c(0,100)) +
  labs(
    title="Council District 2017 Primary Results",
    x="Overall Percent for Larry Krasner",
    y="Count of Districts"
  )


cat(save_and_render_image(bar_da))

D.A. in 2017 was a many-candidate race, so breaking 50% in any district was a feat. Krasner did just that in District 3, and came close in 1, 5, 8, and 9.

View code
init_district_leaflet <- function(district){
  bbox <- st_bbox(
    council %>% filter(as.character(DISTRICT) == !!district) %>%
      with(st_as_sfc(st_as_binary(geometry)))
  )
  leaflet() %>%
    setView(
      zoom=12,
      lng=mean(bbox[c(1,3)]),
      lat=mean(bbox[c(2,4)])
    ) %>%
    addProviderTiles(providers$Stamen.TonerLite) %>%
    setMaxBounds(
      lng1=bbox["xmin"],
      lng2=bbox["xmax"],
      lat1=bbox["ymin"],
      lat2=bbox["ymax"]
    )
}


map_election <- function(election_df, district, title){

  div_df <- divs %>% left_join(election_df)
  
  div_df <- div_df %>%
    mutate(
      popup = glue::glue(
        "<b>Division {warddiv}</b><br>{popup}"
      )
    )
  
  colors <- get_winner_color(election_df)
  winner_fill <- colors[as.character(div_df$winner)]

  # alpha <- div_df$pvote_winner
  # alpha <- alpha / max(alpha, na.rm=TRUE)
  # alpha[is.na(alpha)] <- mean(alpha, na.rm=TRUE)

  alpha <- div_df$total_votes / st_area(div_df$geometry)
  cutoff <- quantile(alpha[!is.na(alpha)], 0.9)
  alpha <- pmin(alpha, cutoff)
  alpha <- alpha / max(alpha, na.rm=TRUE)
  alpha <- 0.1 + 0.9 * as.numeric(alpha)
  alpha[is.na(alpha)] <- mean(alpha, na.rm=TRUE)

  winner_fill[is.na(winner_fill)] <- grey(0.5)
  RGB = colorspace::hex2RGB(winner_fill)@coords
  fill <- rgb(RGB[,1], RGB[,2], RGB[,3], alpha)
  
  init_district_leaflet(district) %>% 
    leaflet::addPolygons(
      data=div_df,
      weight=0,
      color="white",
      opacity=1,
      fillOpacity = 0.8,
      smoothFactor = 0,
      fillColor = fill,
      popup=div_df$popup
    ) %>%
    leaflet::addPolygons(
      data=council %>% filter(DISTRICT == !!district),
      weight = 4,
      fill=FALSE,
      opacity=1,
      color=grey(0.2),
      fillOpacity = 0
    ) %>%
    addControl(title, position="topright", layerId="map_title")  %>%
    addLegend(
      layerId="geom_legend",
      position="bottomright",
      colors=colors,
      labels=names(colors)
    )
}
election_bar <- function(election_df, district, title){
  colors <- get_winner_color(election_df)
  if(length(colors) > 4){
    text_angle <- 45
    text_hjust <- 1
    text_vjust <- 1
  } else {
    text_angle <- 0
    text_hjust <- 0.5
    text_vjust <- 0.5
  }
  ggplot(
    election_df %>%
      left_join(div_council_cw, by=c("warddiv"="geom.id.x")) %>%
      filter(geom.id.y == !!district) %>%
      pivot_longer(starts_with("votes_"), values_to="votes") %>%
      mutate(candidate=factor(gsub("^votes_","",name), levels=names(colors))) %>%
      filter(!is.na(candidate)) %>%
      group_by(candidate) %>%
      summarise(votes=sum(votes)),
    aes(x=candidate, y=votes)
  ) +
    geom_bar(stat="identity", aes(fill=candidate)) +
    labs(
      title=title,
      x=NULL,
      y="Votes"
    ) +
    scale_y_continuous(labels=scales::comma) +
    scale_fill_manual(values=colors, guide=FALSE)+
    theme_sixtysix() %+replace% 
    theme(axis.text.x = element_text(
      angle=text_angle, 
      hjust=text_hjust, 
      vjust=text_vjust
    ))
}
bg_df <- bgs %>% 
  left_join(bg_pops) %>%
  # filter(total_pop > 0) %>%
  mutate(
    across(
      hispanic:asian,
      function(x) x / total_pop,
      .names="p_{.col}"
    )
  ) %>%
  mutate(
    popup=glue::glue(
      "<b>Block Group {GEOID}</b><br>
      2020 Pop: {comma(total_pop)}<br>
      % NH Black: {pct(p_black)}<br>
      % NH White: {pct(p_white)}<br>
      % Hispanic: {pct(p_hispanic)} <br>
      % Asian: {pct(p_asian)} <br>
      "
    ),
    majority_demo = apply(cbind(p_hispanic, p_black, p_white, p_asian), 1, which.max)
  )
bg_df$majority_demo[sapply(bg_df$majority_demo, length) == 0] <- NA
bg_df$majority_demo <- unlist(bg_df$majority_demo)
bg_df$pct_of_majority_demo <- with(
  bg_df, 
  cbind(p_hispanic, p_black, p_white, p_asian)[cbind(1:nrow(bg_df), bg_df$majority_demo)]
)

bg_df$majority_demo <- c("Hispanic", "Black", "White", "Asian")[unlist(bg_df$majority_demo)]

demo_colors <- c(
    "Black"=colors_sixtysix()$strong_green,
    "White"=colors_sixtysix()$strong_blue,
    "Hispanic"=colors_sixtysix()$strong_orange,
    "Asian"=colors_sixtysix()$strong_red
  )

map_demographic <- function(district, title){
  
  demo_fill <- demo_colors[bg_df$majority_demo]
  alpha <- with(bg_df, total_pop/st_area(geometry))
  cutoff <- quantile(alpha, 0.99)
  alpha <- pmin(alpha, cutoff)
  alpha <- alpha / max(alpha, na.rm=TRUE)
  demo_fill[is.na(demo_fill)] <- grey(0.5)
  alpha[is.na(alpha)] <- mean(alpha, na.rm=TRUE)
  RGB = colorspace::hex2RGB(demo_fill)@coords
  fill <- rgb(RGB[,1], RGB[,2], RGB[,3], alpha)

  init_district_leaflet(district) %>% 
    leaflet::addPolygons(
      data=bg_df,
      weight=0,
      color="white",
      opacity=1,
      fillOpacity = 1,
      smoothFactor = 0,
      fillColor = fill,
      popup=bg_df$popup
    ) %>%
    leaflet::addPolygons(
      data=council %>% filter(DISTRICT == !!district),
      weight = 4,
      fill=FALSE,
      opacity=0.8,
      color=grey(0.2),
      fillOpacity = 0
    ) %>%
    addControl(title, position="topright", layerId="map_title")  %>%
    addLegend(
      layerId="geom_legend",
      position="bottomright",
      colors=demo_colors,
      labels=names(demo_colors)
    )
}

save_widget <- function(widget, file, dir="leaflet_files"){
  if(!dir.exists(dir)) dir.create(dir)
  old <- setwd(dir)  # saveWidget can't save to a folder
  on.exit(setwd(old))
  htmlwidgets::saveWidget(
    widget,
    file=file,
    selfcontained=TRUE
  )
}

bar_demographic <- function(district, title){
  ggplot(
    council_pops %>%
      filter(geom.id.y==!!district) %>%
      pivot_longer(hispanic_sum:asian_sum, values_to="pop") %>%
      mutate(
        race=factor(
          format_name(gsub("_sum$","",name)), 
          levels=names(demo_colors)
        ),
        pct_race=pop/total_pop_sum
      ) %>%
      filter(!is.na(race)),
    aes(x=race, y=100*pct_race)
  ) +
    geom_bar(stat="identity", aes(fill=race)) +
    labs(
      title=title,
      x=NULL,
      y="% of District"
    ) +
    scale_y_continuous(labels=scales::comma) +
    scale_fill_manual(values=demo_colors, guide=FALSE)+
    theme_sixtysix()
}
View code
RECREATE_MAPS <- FALSE

cat_ln <- function(...) cat(paste0(..., '\n\n'))
iframe <- function(DIR, file) {
  sprintf(
    '<iframe src="%s/%s" width="100%%" height="600" scrolling="no" frameborder="0"></iframe>',
    DIR, file
  )
}

for(DISTRICT in as.character(1:10)){
  cat_ln(sprintf("### District %s", DISTRICT))

  get_file <- function(pattern) sprintf(pattern, DISTRICT)
  get_title <- function(title) sprintf("%s, District %s", title, DISTRICT)

  pop <- council_pops %>% 
    filter(geom.id.y==!!DISTRICT) %>% with(total_pop_sum)
  
  cat_ln(
    glue::glue(
      "District {DISTRICT} has {scales::comma(pop)} people, ",  
      "{pct(abs(pop - TARGET_POP)/TARGET_POP, 1)} ",
      "{if(pop > TARGET_POP) 'over' else 'under'} the target of ",
      "{scales::comma(round(TARGET_POP))} and would need to ",
      "{if(pop > TARGET_POP) 'shrink' else 'grow'}."
    )
  )
  
  cat_ln(
    case_when(
      DISTRICT == "1" ~ 
        "District 7 to its North and 2 to its Southwest both need to grow. Bringing down the Northern bounadry would cut out the less liberal Hispanic voters along Frankford. Bringing up the Southern boundary would cut out some of the less liberal White voters in South Philly. Either way, this district likely becomes more progressive.",
       DISTRICT == "2" ~ 
        "District 2 could expand into District 1's South Philly, 5's Center City, or 3's Southwest. The first two would add a predominantly-White, leftist population, while the last would add a predominantly-Black, Clinton-supporting group.",
      DISTRICT == "3" ~
        "Bounded by the Schuylkill and two also-too-small districts, District 3 doesn't have a ton of natural space to expand. It will need to expand into either District 2 and 4's predominantly-Clinton sections, or cross the river.",
      DISTRICT == "4"~
        "Of all the districts, 4 needs to grow the most. It could easily come into North Philly's District 5, increasing it's already noteable diversity.",
      DISTRICT == "5"~
        "District 5 is among the highest population districts, and needs to shrink. It could cut out the dense, liberal sections of Center City or Fishtown it currently includes, or yield some of the predominantly-Clinton regions in its North.",
      DISTRICT == "6"~
        "District 6 would need to probably yield some of its River Wards region to 7.",
      DISTRICT == "7" ~
        "District 7 could grow in any direction except the North, into the predominantly-Black sections of North Philly's 5, Bernie-supporting Fisthown of 1, or the more conservative White sections of 6. Regardless, it will almost certainly stay Philadelphia's single predominantly-Hispanic District.",
      DISTRICT == "8" ~
        "District 8's only boundary with a district that needs to shrink is into North Philly's 5.",
      DISTRICT == "9" ~
        "District 9 is basically at the city's average population.",
    DISTRICT == "10" ~
      "Needing a little shrinkage, District 10 could yield some land to 9.",
      TRUE ~ ""
    )
  )
  
  bar_demo <- bar_demographic(
    district=DISTRICT, 
    title=get_title("2020 Census Population")
  )
  cat_ln(save_and_render_image(bar_demo, get_file("bar_demo_%s.png")))
  
  if(RECREATE_MAPS){
    lf_demo <- map_demographic(
      district=DISTRICT, 
      title="2020 Census Population, Opacity = Pop. Density"
    )
    
    render_iframe(lf_demo, get_file("lf_demo_%s.html"))
  }
  cat_ln(iframe("leaflet_files", get_file("lf_demo_%s.html")))
  
  bar_pres <- pres_16 %>% election_bar(
    district=DISTRICT, 
    title=get_title("2016 Presidential Primary")
  )
  cat_ln(save_and_render_image(bar_pres, get_file("bar_pres_%s.png")))

  if(RECREATE_MAPS){
    lf_pres <- pres_16 %>% map_election(
      district=DISTRICT, 
      title="2016 Presidential Primary, Opacity = Vote Density"
    )
    render_iframe(lf_pres, get_file("lf_pres_%s.html"))
  }
  cat_ln(iframe("leaflet_files", get_file("lf_pres_%s.html")))
  

  bar_da <- da_17 %>% election_bar(
    district=DISTRICT, 
    title=get_title("2017 District Attorney Primary")
  )
  cat_ln(save_and_render_image(bar_da, get_file("bar_da_%s.png")))

  if(RECREATE_MAPS){
    lf_da <- da_17 %>% map_election(
      district=DISTRICT, 
      title="2017 District Attorney Primary, Opacity = Vote Density"
    )
    render_iframe(lf_da, get_file("lf_da_%s.html"))
  }
  cat_ln(iframe("leaflet_files", get_file("lf_da_%s.html")))
}

Common Pleas Deep Dive, 2021

Belatedly, I’ve had time to sit down with the 2021 Primary results. Here are some observations.

In November, Philadelphia will elect eight new judges on the Court of Common Pleas. After the May Primary, we know almost certainly who those judges will be; the Democratic nominees will all win.

All eight Democratic nominees are Recommended by the Bar, three Highly. Surprisingly, they don’t include the person in the number one ballot position. And they won with a wide diversity of maps.

View code
library(dplyr)
library(tidyr)
library(ggplot2)
devtools::load_all("../../admin_scripts/sixtysix/")

ballot <- read.csv("../../data/common_pleas/judicial_ballot_position.csv")
res <- readxl::read_xlsx("C:/Users/Jonathan Tannen/Downloads/2021_primary (1).xlsx")
res <- res %>%
  pivot_longer(
    cols=`JUSTICE OF THE\r\nSUPREME COURT DEM\r\nMARIA MCLAUGHLIN`:`QUESTION #5\r\nNO`,
    names_to="candidate",
    values_to="votes"
  )
names(res) <- gsub("(\\r|\\n)+", " ", names(res))
names(res) <- gsub("\\s", "_", tolower(names(res)))

res$vote_type <- case_when(
  res$vote_type == "E" ~ "Election Day",
  res$vote_type == "M" ~ "Mail",
  res$vote_type == "P" ~ "Provisional"
)

res_cp <- res %>%
  filter(
    grepl("^JUDGE OF THE\r\nCOURT OF COMMON PLEAS DEM\r\n", candidate)
  ) %>%
  mutate(
    candidate = gsub("^JUDGE OF THE\r\nCOURT OF COMMON PLEAS DEM\r\n","", candidate)
  )

res_cp <- res_cp %>%
  left_join(ballot %>% filter(year == 2021) %>% mutate(candidate = toupper(name))) %>%
  mutate(name=format_name(name))

assertthat::assert_that(
  res_cp %>% filter(is.na(name)) %>% with(all(candidate == "Write-in"))
)

res_type <- res_cp %>%
  filter(!is.na(name)) %>%
  group_by(name, vote_type, rownumber, colnumber, philacommrec, dcc, inq) %>%
  summarise(votes=sum(votes)) %>%
  group_by(vote_type) %>%
  mutate(pvote = votes/sum(votes))
View code
res_total <- res_type %>% 
  group_by(name, rownumber, colnumber, philacommrec, dcc, inq) %>%
  summarise(votes=sum(votes), .groups="drop") %>%
  mutate(pvote = votes/sum(votes))

ggplot(
  res_total %>% arrange(votes) %>% mutate(winner = rank(-votes) <= 8),
  aes(y=rownumber, x=colnumber)
) +
  geom_tile(
    aes(fill=pvote*100, color=winner),
    size=2
  ) +
  geom_text(
    aes(
      label = ifelse(philacommrec==1, "R", ifelse(philacommrec==2,"HR","")),
      x=colnumber+0.45,
      y=rownumber+0.45
    ),
    color="grey70",
    hjust=1, vjust=0
  ) +
  geom_text(
    aes(
      label = ifelse(dcc==1, "D", ""),
      x=colnumber-0.45,
      y=rownumber+0.45
    ),
    color="grey70",
    hjust=0, vjust=0
  ) +
  geom_text(
    aes(label = sprintf("%s\n%0.1f%%", name, 100*pvote)),
    color="black"
    # fontface="bold"
  ) +
  scale_y_reverse(NULL) +
  scale_x_continuous(NULL)+
  scale_fill_viridis_c(guide=FALSE) +
  scale_color_manual(values=c(`FALSE`=NA, `TRUE`="yellow"), guide=FALSE) +
  annotate(
    "text",
    label="R = Recommended\nHR = Highly Recommended\nD = DCC Endorsed",
    x = 1.6,
    y = 6,
    hjust=0,
    vjust=0.5,
    color="grey70"
  ) +
  theme_sixtysix() %+replace% 
  theme(
    panel.grid.major=element_blank(),
    axis.text=element_blank()
  ) +
  ggtitle(
    "Common Pleas Results",
    "2021 Democratic Primary, arranged as on the ballot. Winners are outlined."
  )

Four candidates won in the first column, three in the second, and one in the third. Three winners were Highly Recommended by the Bar, including Michele Hangley in the second column and Chris Hall in the third, but by itself that rating wasn’t sufficient: John Padova and Mark Moore failed to capitalize on it. There’s some additional work needed to use it to your advantage.

The candidates’ maps are diverse. Nick Kamau and Cateria McCabe won everywhere, though slightly stronger in the Black wards of West and North Philly (and decidedly not the Northeast). Wendi Barish also won everywhere, slightly stronger in Center City and its ring. Betsy Wahl, Chris Hall, and Michele Hangley all won thanks to their strength in the Wealthy Progressive ring around Center City and in Chestnut Hill and Mount Airy. Craig Levin did the opposite, winning the Northeast and West and North Philly, presumably on the strength of his DCC endorsement. And Dan Sulman was the eighth and final winner, with the bright yellow 53rd ward just enough to push him through, where his sister is the Ward Leader.

View code
library(sf)

divs <- st_read("../../data/gis/warddivs/202011/Political_Divisions.shp") %>%
  mutate(warddiv = pretty_div(DIVISION_N))wards <- st_read("../../data/gis/warddivs/201911/Political_Wards.shp") %>%
  mutate(ward=sprintf("%02d", asnum(WARD_NUM)))res_ward_type <- res_cp %>%
  mutate(ward = substr(division, 1, 2)) %>%
  group_by(ward, name, vote_type) %>%
  summarise(votes=sum(votes)) %>%
  group_by(vote_type) %>%
  mutate(pvote=votes/sum(votes))

res_ward <- res_ward_type %>%
  group_by(ward, name) %>%
  summarise(votes=sum(votes)) %>%
  group_by(ward) %>%
  mutate(pvote=votes/sum(votes))

res_ward <- left_join(wards, res_ward)

candidate_order <- res_total %>% arrange(desc(votes)) %>% with(name)

ggplot(
  res_ward %>% 
    filter(!is.na(name)) %>%
    mutate(name=factor(name, levels=candidate_order))
) +
  geom_sf(aes(fill=100*pvote), color=NA) +
  scale_fill_viridis_c("Vote %") +
  facet_wrap(~ name) +
  theme_map_sixtysix() %+replace% theme(legend.position="right") +
  ggtitle("Common Pleas Results", "2021 Democratic Primary")

Caroline Turner was the first runner up, and the first candidate to fail to win from the top ballot position since at least 2007 (which is all the ballot layouts I can find). But she did really well in the 1st and 2nd Wards, which now deserve a name.

The Reclaim Wards

When clicking through the results online, I saw a cut that made me laugh out loud.

Results from Division 01-01

The top eight winners in 01-01 each received more than 9.99% of the vote. Ninth place? Only 2.5%. This is the kind of electoral coordination party bosses dream of.

In fact, that consolidation is true of the entire first ward (covering East Passyunk in South Philly).

View code
ward_bar <- function(ward, endorsements){
  df <- res_ward %>%
  as.data.frame() %>%
  filter(ward==!!ward, !is.na(name)) %>%
  arrange(desc(votes)) %>%
  # filter(1:n() <= 10) %>%
  mutate(name=factor(name, levels=name)) %>%
  mutate(
    last_name = gsub(".* ([A-Za-z]+)$", "\\1", name),
    endorsed= last_name %in% endorsements,
    reclaim = last_name %in% c("Hall", "Hangley", "Kamau", "Barish","Sulman", "McCabe", "Turner", "Wahl")
  )
  
  ggplot(df, aes(x=name, y=votes)) +
  geom_bar(stat="identity", aes(color=endorsed, fill=reclaim), size=1.2) +
      geom_vline(xintercept=8.5, linetype="dashed") +
  scale_color_manual(
    NULL,
    values=c(`TRUE` = "goldenrod", `FALSE`=NA),
    labels=sprintf(c("Not %s Endorsed", "%s Endorsed"), ward)
  ) +
  scale_fill_manual(
    NULL,
    values=c(`TRUE`="grey30", `FALSE`="grey60"),
    labels=c("Not Reclaim", "Reclaim")
  ) +
  theme_sixtysix() %+replace% 
  theme(axis.text.x = element_text(angle=45, hjust=1, vjust=1))+
  labs(
    title=sprintf("Common Pleas Results in Ward %s", ward),
    subtitle="2021 Democratic Primary",
    x=NULL,
    y="Votes"
  ) 

}

ward_bar(
  "01", 
  c("Hall", "Hangley", "Kamau", "Barish","Sulman", "McCabe", "Turner", "Wahl")
)  

It’s usually impossible to separate the many overlapping endorsements. Was it the Bar that brought the win, the DCC, or the Ward? But these eight winners are exactly the ward’s endorsed candidates. They were also the full Reclaim slate, so it’s impossible to separate the Ward’s power from Reclaim. But the deciding factors were almost certainly these two.

The four wards with the biggest gap between eighth and ninth place–suggesting the strongest slate power–were South Philly’s 1st and 2nd and West Philly’s 27th and 46th.

The 2nd Ward, just to the 1st’s North, had slightly different endorsements than Reclaim. The six candidates who had both a Reclaim and 2nd Ward endorsement did best. Barish came in seventh with only a Reclaim endorsement, Levin in eighth with only the 2nd, and Sulman in ninth with only Reclaim.

View code
ward_bar(
  "02", 
  c("Hall", "Hangley", "Kamau", "Levin", "McCabe", "Turner", "Wahl")
)  

In the 27th (where, full disclosure, I’m a committeeperson), the Reclaim endorsements appear to have carried the day: Turner and Hall won, while 27-endorsed Moore and Levin didn’t.

View code
ward_bar(
  "27",
      c("Barish", "Moore", "Hangley", "Kamau", "Levin", "McCabe", "Sulman", "Wahl")
)

I haven’t found the 46th ward endorsements, but the Reclaim slate cleaned up.

View code
ward_bar(
  "46",
      c()
) + labs(subtitle="2021 Primary. Endorsements not available.")

Not only was the Reclaim slate particularly strong in these wards, but the gap between eighth and ninth position make it clear it was the Reclaim endorsement itself, and not one of the other pregressive slates that drove voters.

But the Reclaim endorsement wasn’t itself enough to win across the city. Caroline Turner came in ninth despite it and first ballot position, mostly due to poor results in the Black wards.

Wealthy Progressive divisions did consolidate their votes in a way other divisions didn’t. Grouping divisions by my Voting Blocs shows that the Wealthy Progressive divisions’ preferred candidates did better there than the preferred candidates of other blocs’.

View code
devtools::load_all("C:/Users/Jonathan Tannen/Dropbox/sixty_six/posts/svdcov/")
svd_time <- readRDS("../../data/processed_data/svd_time_20210813.RDS")
div_cats <- get_row_cats(svd_time, 2020)

# ggplot(divs %>% left_join(div_cats, by=c("warddiv"="row_id"))) +
#   geom_sf(aes(fill=cat), color=NA)

res_cat <- res_cp %>% left_join(div_cats, by=c("division"="row_id")) %>%
  filter(!is.na(name)) %>%
  group_by(name, cat) %>%
  summarise(votes=sum(votes)) %>%
  group_by(cat) %>%
  mutate(
    rank = rank(desc(votes)),
    pvote=votes/sum(votes)
  )

ggplot(res_cat, aes(x=rank, y=100*pvote)) +
  geom_bar(stat="identity", fill="grey50") +
  facet_grid(cat ~ .) +
  geom_text(
    aes(label=gsub(".* ([A-Za-z]+)$", "\\1", name)),
    y=0.3,
    angle=90,
    hjust=0
  )+
  theme_sixtysix() +
  geom_vline(xintercept=8.5, linetype="dashed") +
  labs(x="Rank", y="% of Vote", title="Results by Division Bloc")

The top eight candidates in the Wealthy Progressive divisions received on average 9.1% of the vote, compared to 8.2% in the Black Voter divisions, 7.9% in the Hispanic Voter, and 7.5% in the White Moderate. To measure another way, Common Pleas candidates had a gini coefficient of votes of 0.29 in Wealthy Progressive divisions, compared to 0.22, 0.19, and 0.14 in the other three blocs, indicating greater inequality in the votes candidates received, and thus more separation.

This is probably due to voters there being more likely to look up recommendations either on or before election day, and having consolidated preferences.

View code
gini <- function(x){
  outer_sum <- outer(x, x, FUN="-")
  gini <- sum(abs(outer_sum)) / (2 * length(x)^2 * mean(x))
  return(gini)
}

# res_cat %>%
#   group_by(cat) %>%
#   summarise(
#     gini = gini(pvote),
#     mean=mean(pvote[rank <= 8])
#   )

Mail-In Votes

Entering this election, I was especially interested if mail-in ballots would have different dynamics than in-person voting. When people vote at the kitchen table, likely over days, will ballot position matter less? Will endorsements matter more?

In total, 33% of the votes for CP came by Mail, vs 66% on Election Day (and 1% Provisionals). The Wealthy Progressive divisions were more likely to use mail: 38% of their votes were by mail, compared to 34% in the White Moderate divisions, and 29 and 28% in the Black and Hispanic Voter divisions.

View code
# res_cp %>% group_by(vote_type) %>%
#   summarise(votes=sum(votes)) %>%
#   mutate(pct=votes/sum(votes))
# 
# res_cp %>% 
#   left_join(div_cats, by=c("division"="row_id")) %>%
#   group_by(cat, vote_type) %>%
#   summarise(votes=sum(votes)) %>%
#   group_by(cat) %>%
#   mutate(pct=votes/sum(votes))

The candidates who did better by mail were all in the bottom right of the ballot. The top three were also Highly Recommended, suggesting that endorsements were more likely to be looked up by people voting by mail.

View code
res_votetype <- res_cp %>%
  filter(!is.na(name)) %>%
  group_by(vote_type, name, rownumber, colnumber, philacommrec, dcc) %>%
  summarise(votes=sum(votes)) %>%
  group_by(vote_type) %>%
  mutate(pvote=votes/sum(votes)) %>%
  group_by(name) %>%
  mutate(votes_total=sum(votes)) %>%
  ungroup() %>%
  pivot_wider(names_from=vote_type, values_from=c(votes, pvote)) %>%
  mutate(pvote_total=votes_total / sum(votes_total)) %>%
  arrange(desc(pvote_Mail - `pvote_Election Day`))

ggplot(
  res_votetype %>% arrange(votes_total) %>% mutate(winner = rank(-votes_total) <= 8) %>%
    mutate(diff=pvote_Mail - `pvote_Election Day`),
  aes(y=rownumber, x=colnumber)
) +
  geom_tile(
    aes(fill=100*diff),
    size=2
  ) +
  geom_text(
    aes(
      label = ifelse(philacommrec==1, "R", ifelse(philacommrec==2,"HR","")),
      x=colnumber+0.45,
      y=rownumber+0.45
    ),
    color="grey70",
    hjust=1, vjust=0
  ) +
  geom_text(
    aes(
      label = ifelse(dcc==1, "D", ""),
      x=colnumber-0.45,
      y=rownumber+0.45
    ),
    color="grey70",
    hjust=0, vjust=0
  ) +
  geom_text(
    aes(
      label = sprintf("%s\n%s%0.1f%%, (%0.1f%% - %0.1f%%)", name, ifelse(diff>0, "+", "-"),100*abs(diff), 100*pvote_Mail,100* `pvote_Election Day`)
    ),
    color="black"
    # fontface="bold"
  ) +
  scale_y_reverse(NULL) +
  scale_x_continuous(NULL)+
  scale_fill_viridis_c(guide=FALSE) +
  annotate(
    "text",
    label="R = Recommended\nHR = Highly Recommended\nD = DCC Endorsed",
    x = 1.6,
    y = 6,
    hjust=0,
    vjust=0.5,
    color="grey70"
  ) +
  theme_sixtysix() %+replace% 
  theme(
    panel.grid.major=element_blank(),
    axis.text=element_blank()
  ) +
  ggtitle(
    "Common Pleas: Mail minus In Person.",
    "2021 Democratic Primary, arranged as on the ballot."
  )

Some of these differences are due to differential rates of mail-in usage: Moore, Hangley, and Wahl all did better in Wealthy Progressive wards, and they mailed in more often. We can adjust that by taking the within-Division difference for each candidate, and then taking a weighted average across divisions weighted by total votes. This decomposition leads to basically the same finding but slightly smaller effects: Moore did 1.9 percentage points better by mail within a given division, Hangley 1.2, and Hall 0.9.

Some of this may still be because the mail voters were systematically different from in-person voters, but this within-division comparison is the closest we can get with the available data.

Doing the decomposition by Voting Bloc is interesting. Moore does better everywhere by mail than in person, probably reflecting the important DCC and Bar effects but a lack of endorsements with Election Day presence. In every bloc, the highest differences are the poor-ballot-position, Bar-recommended candidates. Interestingly, the largest differences are in the White Moderate divisions (South Philly and the Northeast), probably reflecting the politicized nature of mail-in voting, and that in those divisions there were political differences between those who voted by mail and in person.

View code
res_votetype_weighted <- res_cp %>%
  select(division, name, vote_type, votes) %>%
  filter(!is.na(name)) %>%
  group_by(division, vote_type) %>%
  mutate(pvote=votes/sum(votes)) %>%
  group_by(division) %>%
  mutate(total_votes=sum(votes)) %>%
  select(-votes) %>%
  pivot_wider(names_from=vote_type, values_from=pvote) %>%
  mutate(diff = `Mail` - `Election Day`) %>%
  group_by(name) %>%
  summarise(
    diff=weighted.mean(diff, w=total_votes, na.rm = T)
  )

res_votetype_weighted_cat <- res_cp %>%
  select(division, name, vote_type, votes) %>%
  filter(!is.na(name)) %>%
  group_by(division, vote_type) %>%
  mutate(pvote=votes/sum(votes)) %>%
  group_by(division) %>%
  mutate(total_votes=sum(votes)) %>%
  select(-votes) %>%
  left_join(div_cats, by=c("division"="row_id")) %>%
  pivot_wider(names_from=vote_type, values_from=pvote) %>%
  mutate(diff = `Mail` - `Election Day`) %>%
  group_by(name, cat) %>%
  summarise(
    diff=weighted.mean(diff, w=total_votes, na.rm = T)
  )

ggplot(
  res_votetype_weighted_cat %>% group_by(cat) %>%
    mutate(rank=rank(desc(diff))),
  aes(x=rank, y=100*diff)
) +
  geom_bar(stat="identity") +
  facet_grid(cat~.) +
  theme_sixtysix() +
  geom_text(aes(label= gsub(".* ([A-Za-z]+)$", "\\1", name)), angle=90, hjust=0, y=0.1)+
  labs(
    title="Mail minus In-Person Results", subtitle="By Voting Bloc",
    y="Mail % minus In-Person %",
    x=NULL
  )

Next: The Effect of the Bar Recommendations!

Coming soon.