#install.packages("tidyverse")
library(tidyverse) # A family of packages used to clean, process, model, and visualize data
# install.packages("sf")
library(sf) # Offers Support for simple features, a standardized way to encode spatial vector data. Binds to 'GDAL' for reading and writing data, to 'GEOS' for geometrical operations, and to 'PROJ' for projection conversions and datum transformations.
# install.packages("devtools")
# devtools::install_github("Shelmith-Kariuki/rKenyaCensus")
library(rKenyaCensus) # Contains the 2019 Kenya Census data
# install.packages("ggplot2")
library(ggplot2) # Used for creating amazing pretty graphs in R
# install.packages("tmap") #Thematic maps are geographical maps in which spatial data distributions are visualized.
library(tmap)
# install.packages("leaflet") # Used for creating interactive maps
library(leaflet)
## Method 1
# KenyaSHP <- st_read("../Kenya counties/Kenya_counties.shp", quiet = TRUE,
# stringsAsFactors = FALSE, as_tibble = TRUE)
## Method 2
KenyaSHP <- read_sf("../Kenya counties/Kenya_counties.shp", quiet = TRUE,
stringsAsFactors = FALSE, as_tibble = TRUE)
## To easily view the shapefile in RStudio View pane, you can drop the geometry column and view the rest of the data.
##View(KenyaSHP %>% head())
## View(KenyaSHP %>% st_drop_geometry())
head(KenyaSHP)
## Simple feature collection with 6 features and 8 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 33.99281 ymin: 0.1837641 xmax: 41.90626 ymax: 5.430648
## geographic CRS: WGS 84
## # A tibble: 6 x 9
## OBJECTID AREA PERIMETER COUNTY3_ COUNTY3_ID COUNTY Shape_Leng Shape_Area
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 1 5.68 15.0 2 1 Turka… 15.0 5.68
## 2 2 6.18 12.0 3 2 Marsa… 12.0 6.18
## 3 3 2.12 7.36 4 3 Mande… 7.36 2.12
## 4 4 4.61 9.84 5 4 Wajir 9.84 4.61
## 5 5 0.74 5.03 6 5 West … 5.03 0.740
## 6 6 1.71 8.31 7 6 Sambu… 8.31 1.71
## # … with 1 more variable: geometry <MULTIPOLYGON [°]>
colnames(KenyaSHP)
## [1] "OBJECTID" "AREA" "PERIMETER" "COUNTY3_" "COUNTY3_ID"
## [6] "COUNTY" "Shape_Leng" "Shape_Area" "geometry"
#names(KenyaSHP)
class(KenyaSHP)
## [1] "sf" "tbl_df" "tbl" "data.frame"
glimpse(KenyaSHP)
## Rows: 47
## Columns: 9
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ AREA <dbl> 5.677, 6.177, 2.117, 4.610, 0.740, 1.713, 2.060, 0.877, 0.…
## $ PERIMETER <dbl> 15.047, 11.974, 7.355, 9.838, 5.030, 8.311, 10.181, 5.964,…
## $ COUNTY3_ <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
## $ COUNTY3_ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ COUNTY <chr> "Turkana", "Marsabit", "Mandera", "Wajir", "West Pokot", "…
## $ Shape_Leng <dbl> 15.046838, 11.974165, 7.355154, 9.838408, 5.030271, 8.3110…
## $ Shape_Area <dbl> 5.67698507, 6.17683074, 2.11719607, 4.60958923, 0.74048058…
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((35.79593 5...., MULTIPOLYGON …
KenyaSHP_geometry <- st_geometry(KenyaSHP)
## View one geometry entry
KenyaSHP_geometry[[1]]
class(KenyaSHP_geometry) #sfc, the list-column with the geometries for each feature
## [1] "sfc_MULTIPOLYGON" "sfc"
class(KenyaSHP_geometry[[1]]) #sfg, the feature geometry of an individual simple feature
## [1] "XY" "MULTIPOLYGON" "sfg"
Wikipedia: Map projections try to portray the surface of the earth or a portion of the earth on a flat piece of paper or computer screen. A coordinate reference system (CRS) then defines, with the help of coordinates, how the two-dimensional, projected map in your GIS is related to real places on the earth.
## This line is not necessary since the shapefile is already in the WGS 84 projection.
KenyaSHP <- st_transform(KenyaSHP, crs = 4326)
## Latitude/Longitude: WGS84 (EPSG: 4326)Commonly used by organizations that provide GIS data for the entire globe or many countries. CRS used by Google Earth
## Inspect the co-ordinate reference system
st_crs(KenyaSHP)
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
We will use V4_T2.27: Distribution of Persons with Disability by Type of Disability, Sex, Area of Residence, County and Sub County
Disability_df <- V4_T2.27
## Inspect the county names of the disability data
counties_Disability_df <- unique(Disability_df$County)
## Inspect the county names of the shape file
counties_KenyaSHP <- KenyaSHP %>%
st_drop_geometry() %>%
select(COUNTY) %>%
pull() %>%
unique()
## Convert the disability county names to title case
Disability_df <- Disability_df %>%
ungroup() %>%
mutate(County = tools::toTitleCase(tolower(County)))
## Inspect the county names of the disability data again
counties_Disability_df <- unique(Disability_df$County)
## Inspect the county names that are different in each of the datasets
unique(Disability_df$County)[which(!unique(Disability_df$County) %in% counties_KenyaSHP)]
## [1] "Xxx" "Taita/Taveta" "Tharaka-Nithi" "Elgeyo/Marakwet"
## [5] "Nairobi City"
## Clean the county names so that they match in both datasets
Disability_df <- Disability_df %>%
mutate(County = ifelse(County == "Taita/Taveta", "Taita Taveta",
ifelse(County == "Tharaka-Nithi", "Tharaka",
ifelse(County == "Elgeyo/Marakwet", "Keiyo-Marakwet",
ifelse(County == "Nairobi City", "Nairobi", County)))))
## Inspect the county names again to ensure that they now match.
unique(Disability_df$County)[which(!unique(Disability_df$County) %in% counties_KenyaSHP)]
## [1] "Xxx"
Disability_df2 <- Disability_df %>%
filter(AdminArea == "County") %>%
select(-AdminArea, -SubCounty)
## unique(Disability_df2$County)[which(!unique(Disability_df2$County) %in% counties_KenyaSHP)]
## counties_KenyaSHP[which(!counties_KenyaSHP %in% unique(Disability_df2$County))]
## Rename the COUNTY variable, to match the variable name in the shapefile data
Disability_df2 <- Disability_df2 %>%
rename(COUNTY = County)
## Ensure that there are no leading or trailing spaces in the county variable, in both datasets.
KenyaSHP$COUNTY <- trimws(KenyaSHP$COUNTY)
Disability_df2$COUNTY <- trimws(Disability_df2$COUNTY)
## Merge the data
merged_df <- left_join(KenyaSHP, Disability_df2, by = "COUNTY")
## Sort the data so that the County variable appears first
merged_df <- merged_df %>%
select(COUNTY, everything())
## View the data
# View(merged_df)
# View(merged_df %>% st_drop_geometry())
## Class of the merged data
class(merged_df)
## [1] "sf" "tbl_df" "tbl" "data.frame"
## Column names
colnames(merged_df)
## [1] "COUNTY" "OBJECTID" "AREA"
## [4] "PERIMETER" "COUNTY3_" "COUNTY3_ID"
## [7] "Shape_Leng" "Shape_Area" "geometry"
## [10] "Vision_Total" "Vision_Male" "Vision_Female"
## [13] "Hearing_Total" "Hearing_Male" "Hearing_Female"
## [16] "Mobility_Total" "Mobility_Male" "Mobility_Female"
## [19] "Cognition_Total" "Cognition_Male" "Cognition_Female"
## [22] "SelfCare_Total" "SelfCare_Male" "SelfCare_Female"
## [25] "Communication_Total" "Communication_Male" "Communication_Female"
## Variable types
glimpse(merged_df)
## Rows: 47
## Columns: 27
## $ COUNTY <chr> "Turkana", "Marsabit", "Mandera", "Wajir", "West…
## $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ AREA <dbl> 5.677, 6.177, 2.117, 4.610, 0.740, 1.713, 2.060,…
## $ PERIMETER <dbl> 15.047, 11.974, 7.355, 9.838, 5.030, 8.311, 10.1…
## $ COUNTY3_ <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ COUNTY3_ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ Shape_Leng <dbl> 15.046838, 11.974165, 7.355154, 9.838408, 5.0302…
## $ Shape_Area <dbl> 5.67698507, 6.17683074, 2.11719607, 4.60958923, …
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((35.79593 5...., MUL…
## $ Vision_Total <dbl> 2907, 1137, 1736, 830, 1909, 1302, 1040, 2757, 1…
## $ Vision_Male <dbl> 1308, 516, 894, 447, 862, 511, 402, 1214, 663, 2…
## $ Vision_Female <dbl> 1599, 621, 842, 383, 1047, 791, 638, 1543, 796, …
## $ Hearing_Total <dbl> 1817, 725, 1696, 1005, 1527, 908, 566, 1690, 934…
## $ Hearing_Male <dbl> 903, 336, 855, 543, 733, 416, 265, 794, 469, 127…
## $ Hearing_Female <dbl> 914, 389, 841, 462, 794, 492, 301, 896, 465, 145…
## $ Mobility_Total <dbl> 3184, 1216, 2526, 1629, 2271, 1096, 980, 3805, 2…
## $ Mobility_Male <dbl> 1628, 582, 1282, 877, 1051, 521, 446, 1732, 974,…
## $ Mobility_Female <dbl> 1556, 634, 1243, 752, 1220, 575, 534, 2073, 1076…
## $ Cognition_Total <dbl> 2071, 621, 2187, 1270, 1269, 612, 509, 1854, 973…
## $ Cognition_Male <dbl> 1093, 303, 1117, 664, 593, 301, 248, 851, 445, 1…
## $ Cognition_Female <dbl> 978, 318, 1069, 606, 676, 311, 261, 1003, 528, 2…
## $ SelfCare_Total <dbl> 2230, 619, 2404, 1392, 1249, 680, 498, 1799, 980…
## $ SelfCare_Male <dbl> 1163, 299, 1219, 751, 587, 318, 234, 863, 486, 1…
## $ SelfCare_Female <dbl> 1067, 320, 1184, 641, 662, 362, 264, 936, 494, 1…
## $ Communication_Total <dbl> 1429, 451, 1486, 760, 1030, 448, 358, 1217, 655,…
## $ Communication_Male <dbl> 777, 230, 770, 422, 524, 234, 200, 639, 372, 139…
## $ Communication_Female <dbl> 652, 221, 715, 338, 506, 214, 158, 577, 283, 112…
We are going to plot a base plot map.
plot(KenyaSHP$geometry, lty = 3, col = "brown")
map1 <- ggplot(data = merged_df)+
geom_sf(aes(geometry = geometry, fill = Vision_Total))+
theme_void()+
labs(title = "Distribution of Population with Vision Disability",
caption = "By: @Shel_Kariuki")+
theme(plot.title = element_text(family = "URW Palladio L, Italic",
size = 16, hjust = 0.5),
legend.title = element_blank(),
plot.caption = element_text(family = "URW Palladio L, Italic",size = 12))+
scale_fill_gradient(low = "darkgreen", high = "red")
#scale_fill_viridis_c()
map1
## Set the mode
tmap_mode("view") #Set tmap mode to static plotting or interactive viewing
#tmap_mode("plot")
## Generate the map
## tm_shape(): Specifies the data
## tm_fill(): fills the polygons. Either a fixed color is used, or a color palette is mapped to a data variable.
## tm_borders(): draws the borders of the polygons
## tm_layout(): controls the map layout (title, margins, aspect ratio, colors, frame, legend, among many other things)
## tm_scale_bar(): inserts a scale
## tm_compass(): inserts a compass
map2 <- tm_shape(merged_df) +
tm_fill("Vision_Total",palette="Greens",
title="Distribution of Population with Vision Disability",
id = "COUNTY") +
tm_borders(col = "red",lty = 3)+
tm_layout(title = "Distribution of Population with Vision Disability",
legend.position = c("left", "bottom"))+
tm_scale_bar() +
tm_compass(position = c("right", "top"))
map2
## Specify the color palette
pal <- colorBin(
palette = "YlOrRd",
domain = merged_df$Vision_Total
)
## Modify the look and feel of the labels
labels <- sprintf(
"<strong>%s</strong><br/>%g",
merged_df$COUNTY,
merged_df$Vision_Total) %>%
lapply(htmltools::HTML)
## Generate the map
leafMap <- leaflet(merged_df) %>%
addTiles() %>%
addPolygons(color = "red",
weight = 1,
dashArray = "3",
fillColor = ~pal(Vision_Total),
highlight = highlightOptions(
weight = 4,
color = "red",
dashArray = "",
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(position = c("bottomright"),pal = pal, values = ~Vision_Total)
leafMap
We are done!!