Creating Maps in R

This script outlines the steps that one would need to take to make their first map in R.

1. Load the libraries required

#install.packages("tidyverse")
library(tidyverse) # A family of packages used to clean, process, model, and visualize data

# install.packages("sf")
library(sf) # Offers Support for simple features, a standardized way to encode spatial vector data. Binds to 'GDAL' for reading and writing data, to 'GEOS' for geometrical operations, and to 'PROJ' for projection conversions and datum transformations.

# install.packages("devtools")
# devtools::install_github("Shelmith-Kariuki/rKenyaCensus")
library(rKenyaCensus) # Contains the 2019 Kenya Census data

# install.packages("ggplot2") 
library(ggplot2) # Used for creating amazing pretty graphs in R

# install.packages("tmap") #Thematic maps are geographical maps in which spatial data distributions are visualized
library(tmap)

# install.packages("leaflet") # Used for creating interactive maps
library(leaflet)

2. Read in the Kenya boundaries shapefiles

### Method 1
# KenyaSHP <- st_read("../Documents/PersonalDevelopment/GIS/Kenya counties/Kenya_counties.shp", quiet = TRUE, stringsAsFactors = FALSE, as_tibble = TRUE)

### Method 2
KenyaSHP <- read_sf("Kenya counties/Kenya_counties.shp", quiet = TRUE, stringsAsFactors = FALSE,as_tibble = TRUE)


### To easily view the shapefile in RStudio View pane, you can drop the geometry column and view the rest of the data.

### View(KenyaSHP %>% st_drop_geometry())
2.1 Inspect a few rows
print(KenyaSHP[6:9], n = 3)
#> Simple feature collection with 47 features and 3 fields
#> geometry type:  MULTIPOLYGON
#> dimension:      XY
#> bbox:           xmin: 33.91182 ymin: -4.702271 xmax: 41.90626 ymax: 5.430648
#> geographic CRS: WGS 84
#> # A tibble: 47 x 4
#>   COUNTY  Shape_Leng Shape_Area                                         geometry
#>   <chr>        <dbl>      <dbl>                               <MULTIPOLYGON [°]>
#> 1 Turkana      15.0        5.68 (((35.79593 5.344486, 35.79659 5.344676, 35.797…
#> 2 Marsab…      12.0        6.18 (((36.05061 4.456218, 36.23184 4.451243, 36.245…
#> 3 Mandera       7.36       2.12 (((41.62133 3.976735, 41.62272 3.978599, 41.623…
#> # … with 44 more rows
2.2 Inspect the column names
colnames(KenyaSHP)
#> [1] "OBJECTID"   "AREA"       "PERIMETER"  "COUNTY3_"   "COUNTY3_ID"
#> [6] "COUNTY"     "Shape_Leng" "Shape_Area" "geometry"
#names(KenyaSHP)
2.3 Inspect the class of the shapefile

class(KenyaSHP)
#> [1] "sf"         "tbl_df"     "tbl"        "data.frame"
2.4 Look at the variable data types

glimpse(KenyaSHP)
#> Rows: 47
#> Columns: 9
#> $ OBJECTID   <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
#> $ AREA       <dbl> 5.677, 6.177, 2.117, 4.610, 0.740, 1.713, 2.060, 0.877, 0.…
#> $ PERIMETER  <dbl> 15.047, 11.974, 7.355, 9.838, 5.030, 8.311, 10.181, 5.964,…
#> $ COUNTY3_   <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
#> $ COUNTY3_ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
#> $ COUNTY     <chr> "Turkana", "Marsabit", "Mandera", "Wajir", "West Pokot", "…
#> $ Shape_Leng <dbl> 15.046838, 11.974165, 7.355154, 9.838408, 5.030271, 8.3110…
#> $ Shape_Area <dbl> 5.67698507, 6.17683074, 2.11719607, 4.60958923, 0.74048058…
#> $ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((35.79593 5...., MULTIPOLYGON …
2.5 View the geometry column
KenyaSHP_geometry <- st_geometry(KenyaSHP)

### View one geometry entry
KenyaSHP_geometry[[1]]
2.6 Geometry columns have their own class
class(KenyaSHP_geometry) #sfc, the list-column with the geometries for each feature
#> [1] "sfc_MULTIPOLYGON" "sfc"

class(KenyaSHP_geometry[[1]]) #sfg, the feature geometry of an individual simple feature
#> [1] "XY"           "MULTIPOLYGON" "sfg"

3. Change the projection of the shapefiles

### This line is not necessary since the shapefile is already in the WGS 84 projection.

KenyaSHP <- st_transform(KenyaSHP, crs = 4326)

### Inspect the co-ordinate reference system
st_crs(KenyaSHP)
#> Coordinate Reference System:
#>   User input: WGS 84 
#>   wkt:
#> GEOGCRS["WGS 84",
#>     DATUM["World Geodetic System 1984",
#>         ELLIPSOID["WGS 84",6378137,298.257223563,
#>             LENGTHUNIT["metre",1]]],
#>     PRIMEM["Greenwich",0,
#>         ANGLEUNIT["degree",0.0174532925199433]],
#>     CS[ellipsoidal,2],
#>         AXIS["latitude",north,
#>             ORDER[1],
#>             ANGLEUNIT["degree",0.0174532925199433]],
#>         AXIS["longitude",east,
#>             ORDER[2],
#>             ANGLEUNIT["degree",0.0174532925199433]],
#>     ID["EPSG",4326]]

4. Load the data that we are going to map

We will use V4_T2.27: Distribution of Persons with Disability by Type of Disability, Sex, Area of Residence, County and Sub County


Disability_df <- V4_T2.27

5. Clean the data, so that the counties match those in the shapefile.


### Inspect the county names of the disability data
counties_Disability_df <- unique(Disability_df$County)

### Inspect the county names of the shape file
counties_KenyaSHP <- KenyaSHP %>% 
  st_drop_geometry() %>% 
  select(COUNTY) %>% 
  pull() %>%
  unique()

### Convert the disability county names to title case
Disability_df <- Disability_df %>% 
  ungroup() %>% 
  mutate(County = tools::toTitleCase(tolower(County)))

### Inspect the county names of the disability data again 
counties_Disability_df <- unique(Disability_df$County)

### Inspect the county names that are different in each of the datasets
unique(Disability_df$County)[which(!unique(Disability_df$County) %in% counties_KenyaSHP)]
#> [1] "Xxx"             "Taita/Taveta"    "Tharaka-Nithi"   "Elgeyo/Marakwet"
#> [5] "Nairobi City"

### Clean the county names so that they match in both datasets
Disability_df <- Disability_df %>% 
  mutate(County = ifelse(County == "Taita/Taveta", "Taita Taveta",
                  ifelse(County == "Tharaka-Nithi", "Tharaka",
                  ifelse(County == "Elgeyo/Marakwet", "Keiyo-Marakwet",
                  ifelse(County == "Nairobi City", "Nairobi", County)))))


### Inspect the county names again to ensure that they now match.
unique(Disability_df$County)[which(!unique(Disability_df$County) %in% counties_KenyaSHP)]
#> [1] "Xxx"

6. Carry out some transformations on the data

Disability_df2 <- Disability_df %>% 
  filter(AdminArea == "County") %>% 
  select(-AdminArea, -SubCounty)

### unique(Disability_df2$County)[which(!unique(Disability_df2$County) %in% counties_KenyaSHP)]

### counties_KenyaSHP[which(!counties_KenyaSHP  %in% unique(Disability_df2$County))]

7. Join the shapefile and the data

### Rename the COUNTY variable, to match the variable name in the shapefile data
Disability_df2 <- Disability_df2 %>% 
  rename(COUNTY = County)

### Ensure that there are no leading or trailing spaces in the county variable
KenyaSHP$COUNTY <- trimws(KenyaSHP$COUNTY)
Disability_df2$COUNTY <- trimws(Disability_df2$COUNTY)

### Merge the data
merged_df <- left_join(KenyaSHP, Disability_df2, by = "COUNTY")

### Sort the data so that the County variable appears first
merged_df <- merged_df %>% 
  select(COUNTY, everything())

8. Inspect the merged data

### View the data
#  View(merged_df)
#  View(merged_df %>% st_drop_geometry())

### Class of the merged data
class(merged_df)
#> [1] "sf"         "tbl_df"     "tbl"        "data.frame"

### Column names
colnames(merged_df)
#>  [1] "COUNTY"               "OBJECTID"             "AREA"                
#>  [4] "PERIMETER"            "COUNTY3_"             "COUNTY3_ID"          
#>  [7] "Shape_Leng"           "Shape_Area"           "geometry"            
#> [10] "Vision_Total"         "Vision_Male"          "Vision_Female"       
#> [13] "Hearing_Total"        "Hearing_Male"         "Hearing_Female"      
#> [16] "Mobility_Total"       "Mobility_Male"        "Mobility_Female"     
#> [19] "Cognition_Total"      "Cognition_Male"       "Cognition_Female"    
#> [22] "SelfCare_Total"       "SelfCare_Male"        "SelfCare_Female"     
#> [25] "Communication_Total"  "Communication_Male"   "Communication_Female"

### Variable types
glimpse(merged_df)
#> Rows: 47
#> Columns: 27
#> $ COUNTY               <chr> "Turkana", "Marsabit", "Mandera", "Wajir", "West…
#> $ OBJECTID             <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
#> $ AREA                 <dbl> 5.677, 6.177, 2.117, 4.610, 0.740, 1.713, 2.060,…
#> $ PERIMETER            <dbl> 15.047, 11.974, 7.355, 9.838, 5.030, 8.311, 10.1…
#> $ COUNTY3_             <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
#> $ COUNTY3_ID           <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
#> $ Shape_Leng           <dbl> 15.046838, 11.974165, 7.355154, 9.838408, 5.0302…
#> $ Shape_Area           <dbl> 5.67698507, 6.17683074, 2.11719607, 4.60958923, …
#> $ geometry             <MULTIPOLYGON [°]> MULTIPOLYGON (((35.79593 5...., MUL…
#> $ Vision_Total         <dbl> 2907, 1137, 1736, 830, 1909, 1302, 1040, 2757, 1…
#> $ Vision_Male          <dbl> 1308, 516, 894, 447, 862, 511, 402, 1214, 663, 2…
#> $ Vision_Female        <dbl> 1599, 621, 842, 383, 1047, 791, 638, 1543, 796, …
#> $ Hearing_Total        <dbl> 1817, 725, 1696, 1005, 1527, 908, 566, 1690, 934…
#> $ Hearing_Male         <dbl> 903, 336, 855, 543, 733, 416, 265, 794, 469, 127…
#> $ Hearing_Female       <dbl> 914, 389, 841, 462, 794, 492, 301, 896, 465, 145…
#> $ Mobility_Total       <dbl> 3184, 1216, 2526, 1629, 2271, 1096, 980, 3805, 2…
#> $ Mobility_Male        <dbl> 1628, 582, 1282, 877, 1051, 521, 446, 1732, 974,…
#> $ Mobility_Female      <dbl> 1556, 634, 1243, 752, 1220, 575, 534, 2073, 1076…
#> $ Cognition_Total      <dbl> 2071, 621, 2187, 1270, 1269, 612, 509, 1854, 973…
#> $ Cognition_Male       <dbl> 1093, 303, 1117, 664, 593, 301, 248, 851, 445, 1…
#> $ Cognition_Female     <dbl> 978, 318, 1069, 606, 676, 311, 261, 1003, 528, 2…
#> $ SelfCare_Total       <dbl> 2230, 619, 2404, 1392, 1249, 680, 498, 1799, 980…
#> $ SelfCare_Male        <dbl> 1163, 299, 1219, 751, 587, 318, 234, 863, 486, 1…
#> $ SelfCare_Female      <dbl> 1067, 320, 1184, 641, 662, 362, 264, 936, 494, 1…
#> $ Communication_Total  <dbl> 1429, 451, 1486, 760, 1030, 448, 358, 1217, 655,…
#> $ Communication_Male   <dbl> 777, 230, 770, 422, 524, 234, 200, 639, 372, 139…
#> $ Communication_Female <dbl> 652, 221, 715, 338, 506, 214, 158, 577, 283, 112…

9. Visualise the data

9.1 plot()

We are going to plot a base plot / map.

plot(KenyaSHP$geometry, lty = 3, col = "darkgreen")

9.2 ggplot2()
map1 <- ggplot(data = merged_df)+
          geom_sf(aes(geometry = geometry, fill = Vision_Total))+
            theme_void()+
            labs(title = "Distribution of Population with Vision Disability",
                 caption = "By: @Shel_Kariuki")+
            theme(plot.title = element_text(family = "URW Palladio L, Italic",size = 16, hjust = 0.5),
                  legend.title = element_blank(),
                  plot.caption = element_text(family = "URW Palladio L, Italic",size = 12))+
  #scale_fill_gradient(low = "darkgreen", high = "red")
  scale_fill_viridis_c()
           
map1

9.3 tmap()
tmap_mode("plot") #Set tmap mode to static plotting or interactive viewing

map2 <- tm_shape(merged_df) +
  tm_fill("Vision_Total",palette="Greens",
          title="Distribution of Population with Vision Disability", 
          id = "COUNTY") +
  tm_borders(col = "red",lty = 3)+
  tm_layout(legend.position = c("left", "bottom"))
map2

9.4 leaflet()

### Specify the color scheme
pal <- colorBin(
  palette = "YlOrRd",
  domain = merged_df$Vision_Total
)

### Specify how labels will be displayed
labels <- sprintf(
  "<strong>%s</strong><br/>%g",
  merged_df$COUNTY, merged_df$Vision_Total
) %>% lapply(htmltools::HTML)

### Generate the graph
leaflet(merged_df) %>%
  addTiles() %>% 
  addPolygons(color = "red", weight = 1, dashArray = "3", fillColor = ~pal(Vision_Total),
              highlight = highlightOptions(
                weight = 4,
                color = "red",
                dashArray = "",
                bringToFront = TRUE),
              label = labels,
              labelOptions = labelOptions(
                style = list("font-weight" = "normal", padding = "3px 8px"),
                textsize = "15px",
                direction = "auto")) %>% 
  addLegend(position = c("bottomright"),pal = pal, values = ~Vision_Total)

We are done!!

...

Senior Data Analyst

Related