Likert histogram

Used to study the distribution of responses to a likert scale

Previous Next

The chart

Stata

R

The code

Stata

* Create fake dataset (delete this section and import your own data)
clear
set obs 500
gen rand = runiform()
gen outcome_var = 1
forval y=2/5{
	local z = `y'-1
	replace outcome_var = `y' if rand >= `z'/5
}

* Enter your relevant variable names here
local var outcome_var

* Change the graph font here if you want
// graph set window fontface "Times New Roman"

* X-axis categories
gen scatter_x = .
replace scatter_x = _n in 1/5

* X-axis label
local xlab1 "Strongly disagree"
local xlab5 "Strongly agree"
local xlab "1 `""1" "`xlab1'""' 2 "2" 3 "3" 4 "4" 5 `""5" "`xlab5'""'"
local xrange "0.5 5.5"

* Bar labels
gen scatter_y = .
gen scatter_y_str = ""
forval i = 1/5{
	count if `var' == `i'
	local num `r(N)'
	count if !missing(`var')
	local denom `r(N)'
	local loc = 100*round(`num'/`denom', 0.001)
	replace scatter_y = `loc'  in `i'
	replace scatter_y_str = "`num' ("+strofreal(`loc',"%4.0f")+"%)" in `i'
}

* Y-axis labels
local yval 50
local ylab 0 "0%" 10 "10%" 20 "20%" 30 "30%" 40 "40%" 50 "50%"
sum scatter_y
local ymax `r(max)'
if `ymax' > 50{
	local yval = 10*ceil(`ymax'/10)
	forval i = 50(10)`yval'{
		local ylab `ylab' `i' "`i'%"
	}
}

* Overall mean caption
sum `var'
local overall_mean = `r(mean)'
local overall_mean: disp %4.2f `overall_mean'


* Likert historgram graph
twoway ///
	(histogram `var', discrete width(1) percent fcolor(navy) lcolor(black) gap(20)) ///
	(scatter scatter_y scatter_x, mlabel(scatter_y_str) msymbol(i) mlabsize(medium) mlabcolor(black) mlabposition(12)) ///
	, ///
	text(40 3 "Mean: `overall_mean'", box fcolor(none) lcolor(black) size(medlarge) margin(small)) ///
	xtitle("") xlabel(`xlab', labsize(medlarge) noticks `condition_xlab') ///
	xscale(range(0.5 5.5)) ///
	ylabel(`ylab', labsize(large) angle(horizontal) noticks labgap(2)) ///
	yscale(range(0 55)) ///
	ytitle("") ///
	title("Graph Title") ///
	scheme(s1color) ///
	legend(off) ///
	plotregion(margin(zero) style(none))
graph export "Likert histogram graph.png", replace

R

# Likert histogram

################################ Initial Setup #################################
# Install required packages if they are not already in your system
packages <- c('tidyverse')

lapply(packages, function(i) {if(!i %in% installed.packages() == T)
{install.packages(i, dependencies = TRUE, repos='http://cran.rstudio.com/')}})

# Loading required packages
library("tidyverse")

# Setting working directory
setwd("~/Dropbox (IDinsight)/Data visualization library")

################################ Loading dataset ###############################
# Generating dummy data
set.seed(1)
mydata <- as.data.frame(sample(1:5, 750, replace = T))
colnames(mydata)[1] <- "outcome_variable"

################################ Data processing ###############################

# Creating the bins for the columns
my_data_processed <- mydata %>%
  group_by(outcome_variable) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n) * 100)

# Storing the overall mean in a variable. This will be displayed on the plot
overall_mean <- round(mean(mydata$outcome_variable), 2)

############################## Creating the graph ##############################

x_values <- as.factor(my_data_processed$outcome_variable)
y_values <- my_data_processed$freq
annotate_value <- overall_mean

plot <- my_data_processed %>%

  # Setting aesthetic which will be inherited by other geometric objects
  ggplot(aes(x = x_values, y = y_values)) +

  # "identity" selected for stat since we want the heights of the bars to
  # represent values in the data. The default, "bin" makes the height of each
  # bar equal to the number of cases in each group.
  # fill (color inside that bar) is set to the IDinsight brand color
  # color (bar outline) is set as black
  geom_bar(stat = "identity", width = 0.65, fill = "#264D96",
           color = "black") +

  # Changing the x-axis label for the 1st and 5th bars
  # \n is the new line character
  scale_x_discrete(labels = c("1" = "1\nStrongly disagree",
                              "5" = "5\nStrongly agree")) +

  # Changing the y-axis labels to include the percentage sign (%)
  # Breaks determine the label ticks
  # Limit determines the length of the y-axis
  # Expand with (0, 0) removes space between the y-axis and the data.
  scale_y_continuous(expand = c(0,0),
                     limits = c(0, max(y_values) + 30),
                     breaks = c(seq(0, 50, by = 10)),
                     labels = c("0%", "10%", "20%", "30%", "40%", "50%")) +

  # The mean is displayed 1 unit above the frequency to keep space on top of the
  # bars.
  # The label which show the count and then the percentage is created using the
  # paste0 function. The label is then formatted to be in the IDinsight brand
  # color.
  geom_text(aes(y = y_values + 1, label = paste0(n, " (", round(y_values, 1), "%)")),
            color = "#264D96") +

  # This creates the box with the overall mean.
  annotate(geom = "label", x = 3, y = max(y_values) + 20,
           label = paste0("Mean: ", annotate_value),
           size = 5,
           color = "#264D96") +

  # Graph title
  labs(title = "Graph Title")

############################# Formatting the graph #############################
plot +
  theme_classic() +
  # The following visual elements have been changed:
    # Changed the overall font to Inter
    # Removed axis titles
    # Resized and changed the color of the axis text
    # Removed axis ticks
    # Changed the size of the axis line
    # Centered and changed the color of the plot title
  theme(text = element_text(family = "Inter"),
        axis.title = element_blank(),
        axis.text = element_text(size = 11, colour = "#264D96"),
        axis.ticks = element_blank(),
        axis.line = element_line(size = 0.4),
        plot.title = element_text(hjust = 0.5, color = "#264D96",
                                  size = 16))

############################# Saving and exporting #############################
#indicating the export folder and the image file name
export_folder <- "R/Bar graphs/Exports/"
img_name <- "histogram_likert_reviewed.png"
ggsave(paste(export_folder, img_name, sep = ""))

Other details

R

Code written by Arkadeep Bandyopadhyay and reviewed by Sandra Alemayehu.
Colors for the graph have been selected from IDinsight’s brand guide.