1. Load the libraries required

#install.packages("tidyverse")
library(tidyverse) # A family of packages used to clean, process, model, and visualize data

# install.packages("sf")
library(sf) # Offers Support for simple features, a standardized way to encode spatial vector data. Binds to 'GDAL' for reading and writing data, to 'GEOS' for geometrical operations, and to 'PROJ' for projection conversions and datum transformations.

# install.packages("devtools")
# devtools::install_github("Shelmith-Kariuki/rKenyaCensus")
library(rKenyaCensus) # Contains the 2019 Kenya Census data

# install.packages("ggplot2") 
library(ggplot2) # Used for creating amazing pretty graphs in R

# install.packages("tmap") #Thematic maps are geographical maps in which spatial data distributions are visualized.
library(tmap)

# install.packages("leaflet") # Used for creating interactive maps
library(leaflet)

2. Read in the Kenya boundaries shapefiles

## Method 1
# KenyaSHP <- st_read("../Kenya counties/Kenya_counties.shp", quiet = TRUE,
#                     stringsAsFactors = FALSE, as_tibble = TRUE)

## Method 2
KenyaSHP <- read_sf("../Kenya counties/Kenya_counties.shp", quiet = TRUE, 
                    stringsAsFactors = FALSE, as_tibble = TRUE)


## To easily view the shapefile in RStudio View pane, you can drop the geometry column and view the rest of the data.

##View(KenyaSHP %>% head())

## View(KenyaSHP %>% st_drop_geometry())

2.1 Inspect a few rows

head(KenyaSHP)
## Simple feature collection with 6 features and 8 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 33.99281 ymin: 0.1837641 xmax: 41.90626 ymax: 5.430648
## geographic CRS: WGS 84
## # A tibble: 6 x 9
##   OBJECTID  AREA PERIMETER COUNTY3_ COUNTY3_ID COUNTY Shape_Leng Shape_Area
##      <int> <dbl>     <dbl>    <dbl>      <dbl> <chr>       <dbl>      <dbl>
## 1        1  5.68     15.0         2          1 Turka…      15.0       5.68 
## 2        2  6.18     12.0         3          2 Marsa…      12.0       6.18 
## 3        3  2.12      7.36        4          3 Mande…       7.36      2.12 
## 4        4  4.61      9.84        5          4 Wajir        9.84      4.61 
## 5        5  0.74      5.03        6          5 West …       5.03      0.740
## 6        6  1.71      8.31        7          6 Sambu…       8.31      1.71 
## # … with 1 more variable: geometry <MULTIPOLYGON [°]>

2.2 Inspect the column names

colnames(KenyaSHP)
## [1] "OBJECTID"   "AREA"       "PERIMETER"  "COUNTY3_"   "COUNTY3_ID"
## [6] "COUNTY"     "Shape_Leng" "Shape_Area" "geometry"
#names(KenyaSHP)

2.3 Inspect the class of the shapefile

class(KenyaSHP)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"

2.4 Look at the variable data types

glimpse(KenyaSHP)
## Rows: 47
## Columns: 9
## $ OBJECTID   <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ AREA       <dbl> 5.677, 6.177, 2.117, 4.610, 0.740, 1.713, 2.060, 0.877, 0.…
## $ PERIMETER  <dbl> 15.047, 11.974, 7.355, 9.838, 5.030, 8.311, 10.181, 5.964,…
## $ COUNTY3_   <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
## $ COUNTY3_ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ COUNTY     <chr> "Turkana", "Marsabit", "Mandera", "Wajir", "West Pokot", "…
## $ Shape_Leng <dbl> 15.046838, 11.974165, 7.355154, 9.838408, 5.030271, 8.3110…
## $ Shape_Area <dbl> 5.67698507, 6.17683074, 2.11719607, 4.60958923, 0.74048058…
## $ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((35.79593 5...., MULTIPOLYGON …

2.5 View the geometry column

KenyaSHP_geometry <- st_geometry(KenyaSHP)

## View one geometry entry
KenyaSHP_geometry[[1]]

2.6 Geometry columns have their own class

class(KenyaSHP_geometry) #sfc, the list-column with the geometries for each feature
## [1] "sfc_MULTIPOLYGON" "sfc"
class(KenyaSHP_geometry[[1]]) #sfg, the feature geometry of an individual simple feature
## [1] "XY"           "MULTIPOLYGON" "sfg"

3. Change the projection of the shapefiles

Wikipedia: Map projections try to portray the surface of the earth or a portion of the earth on a flat piece of paper or computer screen. A coordinate reference system (CRS) then defines, with the help of coordinates, how the two-dimensional, projected map in your GIS is related to real places on the earth.

## This line is not necessary since the shapefile is already in the WGS 84 projection.

KenyaSHP <- st_transform(KenyaSHP, crs = 4326)

## Latitude/Longitude: WGS84 (EPSG: 4326)Commonly used by organizations that provide GIS data for the entire globe or many countries. CRS used by Google Earth

## Inspect the co-ordinate reference system
st_crs(KenyaSHP)
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]

4. Load the data that we are going to map

We will use V4_T2.27: Distribution of Persons with Disability by Type of Disability, Sex, Area of Residence, County and Sub County

Disability_df <- V4_T2.27

5. Clean the data, so that the counties match those in the shapefile.

## Inspect the county names of the disability data
counties_Disability_df <- unique(Disability_df$County)

## Inspect the county names of the shape file
counties_KenyaSHP <- KenyaSHP %>% 
  st_drop_geometry() %>% 
  select(COUNTY) %>% 
  pull() %>%
  unique()

## Convert the disability county names to title case
Disability_df <- Disability_df %>% 
  ungroup() %>% 
  mutate(County = tools::toTitleCase(tolower(County)))

## Inspect the county names of the disability data again 
counties_Disability_df <- unique(Disability_df$County)

## Inspect the county names that are different in each of the datasets
unique(Disability_df$County)[which(!unique(Disability_df$County) %in% counties_KenyaSHP)]
## [1] "Xxx"             "Taita/Taveta"    "Tharaka-Nithi"   "Elgeyo/Marakwet"
## [5] "Nairobi City"
## Clean the county names so that they match in both datasets
Disability_df <- Disability_df %>% 
  mutate(County = ifelse(County == "Taita/Taveta", "Taita Taveta",
                  ifelse(County == "Tharaka-Nithi", "Tharaka",
                  ifelse(County == "Elgeyo/Marakwet", "Keiyo-Marakwet",
                  ifelse(County == "Nairobi City", "Nairobi", County)))))


## Inspect the county names again to ensure that they now match.
unique(Disability_df$County)[which(!unique(Disability_df$County) %in% counties_KenyaSHP)]
## [1] "Xxx"

6. Drop variables that we do not need, and filter the data to only be left with the Counties

Disability_df2 <- Disability_df %>% 
  filter(AdminArea == "County") %>% 
  select(-AdminArea, -SubCounty)

## unique(Disability_df2$County)[which(!unique(Disability_df2$County) %in% counties_KenyaSHP)]

## counties_KenyaSHP[which(!counties_KenyaSHP  %in% unique(Disability_df2$County))]

7. Join the shapefile and the data

## Rename the COUNTY variable, to match the variable name in the shapefile data
Disability_df2 <- Disability_df2 %>% 
  rename(COUNTY = County)

## Ensure that there are no leading or trailing spaces in the county variable, in both datasets.
KenyaSHP$COUNTY <- trimws(KenyaSHP$COUNTY)
Disability_df2$COUNTY <- trimws(Disability_df2$COUNTY)

## Merge the data
merged_df <- left_join(KenyaSHP, Disability_df2, by = "COUNTY")

## Sort the data so that the County variable appears first
merged_df <- merged_df %>% 
  select(COUNTY, everything())

8. Inspect the merged data

## View the data
#  View(merged_df)
#  View(merged_df %>% st_drop_geometry())

## Class of the merged data
class(merged_df)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"
## Column names
colnames(merged_df)
##  [1] "COUNTY"               "OBJECTID"             "AREA"                
##  [4] "PERIMETER"            "COUNTY3_"             "COUNTY3_ID"          
##  [7] "Shape_Leng"           "Shape_Area"           "geometry"            
## [10] "Vision_Total"         "Vision_Male"          "Vision_Female"       
## [13] "Hearing_Total"        "Hearing_Male"         "Hearing_Female"      
## [16] "Mobility_Total"       "Mobility_Male"        "Mobility_Female"     
## [19] "Cognition_Total"      "Cognition_Male"       "Cognition_Female"    
## [22] "SelfCare_Total"       "SelfCare_Male"        "SelfCare_Female"     
## [25] "Communication_Total"  "Communication_Male"   "Communication_Female"
## Variable types
glimpse(merged_df)
## Rows: 47
## Columns: 27
## $ COUNTY               <chr> "Turkana", "Marsabit", "Mandera", "Wajir", "West…
## $ OBJECTID             <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ AREA                 <dbl> 5.677, 6.177, 2.117, 4.610, 0.740, 1.713, 2.060,…
## $ PERIMETER            <dbl> 15.047, 11.974, 7.355, 9.838, 5.030, 8.311, 10.1…
## $ COUNTY3_             <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ COUNTY3_ID           <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ Shape_Leng           <dbl> 15.046838, 11.974165, 7.355154, 9.838408, 5.0302…
## $ Shape_Area           <dbl> 5.67698507, 6.17683074, 2.11719607, 4.60958923, …
## $ geometry             <MULTIPOLYGON [°]> MULTIPOLYGON (((35.79593 5...., MUL…
## $ Vision_Total         <dbl> 2907, 1137, 1736, 830, 1909, 1302, 1040, 2757, 1…
## $ Vision_Male          <dbl> 1308, 516, 894, 447, 862, 511, 402, 1214, 663, 2…
## $ Vision_Female        <dbl> 1599, 621, 842, 383, 1047, 791, 638, 1543, 796, …
## $ Hearing_Total        <dbl> 1817, 725, 1696, 1005, 1527, 908, 566, 1690, 934…
## $ Hearing_Male         <dbl> 903, 336, 855, 543, 733, 416, 265, 794, 469, 127…
## $ Hearing_Female       <dbl> 914, 389, 841, 462, 794, 492, 301, 896, 465, 145…
## $ Mobility_Total       <dbl> 3184, 1216, 2526, 1629, 2271, 1096, 980, 3805, 2…
## $ Mobility_Male        <dbl> 1628, 582, 1282, 877, 1051, 521, 446, 1732, 974,…
## $ Mobility_Female      <dbl> 1556, 634, 1243, 752, 1220, 575, 534, 2073, 1076…
## $ Cognition_Total      <dbl> 2071, 621, 2187, 1270, 1269, 612, 509, 1854, 973…
## $ Cognition_Male       <dbl> 1093, 303, 1117, 664, 593, 301, 248, 851, 445, 1…
## $ Cognition_Female     <dbl> 978, 318, 1069, 606, 676, 311, 261, 1003, 528, 2…
## $ SelfCare_Total       <dbl> 2230, 619, 2404, 1392, 1249, 680, 498, 1799, 980…
## $ SelfCare_Male        <dbl> 1163, 299, 1219, 751, 587, 318, 234, 863, 486, 1…
## $ SelfCare_Female      <dbl> 1067, 320, 1184, 641, 662, 362, 264, 936, 494, 1…
## $ Communication_Total  <dbl> 1429, 451, 1486, 760, 1030, 448, 358, 1217, 655,…
## $ Communication_Male   <dbl> 777, 230, 770, 422, 524, 234, 200, 639, 372, 139…
## $ Communication_Female <dbl> 652, 221, 715, 338, 506, 214, 158, 577, 283, 112…

9. Visualise the data

9.1 plot()

We are going to plot a base plot map.

plot(KenyaSHP$geometry, lty = 3, col = "brown")

9.2 ggplot2()

map1 <- ggplot(data = merged_df)+
          geom_sf(aes(geometry = geometry, fill = Vision_Total))+
          theme_void()+
          labs(title = "Distribution of Population with Vision Disability",
                 caption = "By: @Shel_Kariuki")+
          theme(plot.title = element_text(family = "URW Palladio L, Italic",
                                            size = 16, hjust = 0.5),
                legend.title = element_blank(),
                plot.caption = element_text(family = "URW Palladio L, Italic",size = 12))+
  scale_fill_gradient(low = "darkgreen", high = "red")
  #scale_fill_viridis_c()
           
map1

9.3 tmap()

## Set the mode

tmap_mode("view") #Set tmap mode to static plotting or interactive viewing
#tmap_mode("plot")

## Generate the map
## tm_shape(): Specifies the data
## tm_fill(): fills the polygons. Either a fixed color is used, or a color palette is mapped to a data variable.
## tm_borders(): draws the borders of the polygons
## tm_layout(): controls the map layout (title, margins, aspect ratio, colors, frame, legend, among many other things)
## tm_scale_bar(): inserts a scale
## tm_compass(): inserts a compass

map2 <- tm_shape(merged_df) +
  tm_fill("Vision_Total",palette="Greens",
          title="Distribution of Population with Vision Disability", 
          id = "COUNTY") +
    tm_borders(col = "red",lty = 3)+
  tm_layout(title = "Distribution of Population with Vision Disability",
            legend.position = c("left", "bottom"))+
  tm_scale_bar() +
  tm_compass(position = c("right", "top"))
map2

9.4 leaflet()

## Specify the color palette
pal <- colorBin(
  palette = "YlOrRd",
  domain = merged_df$Vision_Total
)

## Modify the look and feel of the labels
labels <- sprintf(
  "<strong>%s</strong><br/>%g",
  merged_df$COUNTY, 
  merged_df$Vision_Total) %>% 
  lapply(htmltools::HTML)

## Generate the map
leafMap <- leaflet(merged_df) %>%
  addTiles() %>% 
  addPolygons(color = "red", 
              weight = 1, 
              dashArray = "3", 
              fillColor = ~pal(Vision_Total),
              highlight = highlightOptions(
                weight = 4,
                color = "red",
                dashArray = "",
                bringToFront = TRUE),
              label = labels,
              labelOptions = labelOptions(
                style = list("font-weight" = "normal", padding = "3px 8px"),
                textsize = "15px",
                direction = "auto")) %>% 
  addLegend(position = c("bottomright"),pal = pal, values = ~Vision_Total)
leafMap

We are done!!

...