Horizontal bar

Used to compare discrete numerical values across categorical variables

Previous Next

The chart

Stata

R

Data

Sample dataset in .dta format can be downloaded here. The R code uses the same dataset in .csv format, which can be found here.

The code

Stata

*_______________________________________________________________________________

* Project       : IDinsight data visualization guide - Stata graph templates
*
* Graph Type    : Bar graphs with CIs, grouping outcomes over another variable
*				  Vertical and horizontal

* By            : Crystal Huang
* Last edited	: April 15, 2019
*_______________________________________________________________________________

* Toy dataset

global data "/Users/crystalhuang/Dropbox (IDinsight)/Data Visualization Guide/3. Toy Data"
global output "/Users/crystalhuang/Dropbox (IDinsight)/Data Visualization Guide/2. Graph templates/graphs"

use "$data/toydata.dta", clear

* Set IDinsight scheme and font

set scheme idinsight
graph set window fontface "Arial"

* Graph 2 - BMI, Blood Pressure, Diabetes, by region

* Define locals for outcomes

    local outcomes anemic overweight bp_high

* Get standard errors, upper and lower CIs for each outcome var

    foreach y of local outcomes {

      g se_`y'= .
      g lo_`y'= .
      g hi_`y'= .

      * loop over grouping var
      forval i= 1/4 {

        mean `y' if region== `i'
        mat A= r(table)
        local se_`i'= A[2,1]
        local lo_`i'= A[5,1]
        local hi_`i'= A[6,1]

        replace se_`y'= `se_`i'' if region== `i'
        replace lo_`y'= `lo_`i'' if region== `i'
        replace hi_`y'= `hi_`i'' if region== `i'
      }
    }

* Collapse to get height of each bar (outcome variable mean) by grouping var (region)

    collapse (mean) `outcomes' (first) se_* lo_* hi_*, by(region)
    g id= _n

* Rename so outcomes have the same prefix for reshaping long

    foreach y of local outcomes {
        ren `y' mean_`y'
    }

* Reshape so it's one row per outcome, per group

    reshape long mean se lo hi, i(id) j(outcome) string
    replace outcome = subinstr(outcome, "_", "", 1)

* Set bar label format, make units into percent

    foreach var in mean se hi lo {
        replace `var'= `var'*100
    }

    format mean %4.1f

* Create x-axis spacing

    gsort outcome +mean
    g x= region if outcome== "anemic"
    replace x= region + 5 if outcome== "overweight"
    replace x= region + 10 if outcome== "bp_high"


* GRAPH: Horizontal bars, grouped by region with CIs

    #delimit;
    twoway  (bar mean x if region==1, horizontal barw(0.50))
            (bar mean x if region==2, horizontal barw(0.50))
            (bar mean x if region==3, horizontal barw(0.50))
            (bar mean x if region==4, horizontal barw(0.50))
            (rcap hi lo x, horizontal lwidth(thin) lcolor(black))
            (scatter x hi, msym(none) mlab(mean) mlabpos(3) mlabgap(1) mlabsize(2) mlabcolor(black)),
            xlabel(0(10)70, format(%4.0f))
            ylabel(2.5 "Anemic" 7.5 `" "High"  "Blood Pressure" "' 12.5 "Overweight", nogrid)
            ytitle("")
            xtitle("Percent %")
            title("Health indicator prevalence, by region")
            legend(region(lcolor(white)) order(1 "North-East" 2 "Mid-West" 3 "South" 4 "West") rows(4) pos(3))
            xscale(lcolor(none)) ;

    graph export "$output/bargraph_horizontal.tif", replace ;
    #delimit cr

R

# Horizontal bar

################################# Initial Setup ################################
# Install required packages if they are not already in your system
packages <- c('tidyverse')

lapply(packages, function(i) {if(!i %in% installed.packages() == T)
  {install.packages(i, dependencies = TRUE, repos='http://cran.rstudio.com/')}})

# Loading required packages
library("tidyverse")

# Setting working directory
setwd("~/Dropbox (IDinsight)/Data visualization library")


############################## Loading dataset #################################

mydata <- read_csv("Data/toydata.csv", show_col_types = FALSE)


############################### Data processing ################################

# Changing the structure of the dataset from wide to long
mydata_long <- mydata %>%
  select(personid, region, bp_high, anemic, overweight) %>%
  pivot_longer(cols = c(3:5),
               names_to = "disease",
               values_to = "disease_present") %>%
  arrange(personid)

# Converting the regions to factor variable so that we can maintain a specific
# legend order. The levels provided below dictates the order of the bars and
# the legend.
mydata_long$region <- factor(mydata_long$region,
                             levels = c("North-East", "Mid-West",
                                        "South", "West"))

# Creating the means and CIs

# Assuming 95% CI
alpha = 0.05

# Creating means and CI and storing in my_sum tibble (dataframe)
my_sum <- mydata_long %>%
  group_by(region, disease) %>%
  summarise(n = n(),
            mean = mean(disease_present, na.rm = T),
            sd = sd(disease_present, na.rm = T)) %>%
  mutate(se = sd/sqrt(n)) %>%
  mutate(ic = se * qt((1-alpha)/2 + 0.5, n - 1)) %>%
  arrange(disease)


############################## Creating the graph ##############################

# Define axis and fill variable
x_axis <- my_sum$disease
y_axis <- my_sum$mean
fill_by <- my_sum$region

plot <- my_sum %>%

  # Setting aesthetic which will be inherited by other geometric objects
  ggplot(aes(x = x_axis, y = y_axis, fill = fill_by)) +

  # "identity" selected for stat since we want the heights of the bars to
  # represent values in the data. The default, "bin" makes the height of each
  # bar equal to the number of cases in each group.
  # position dodge applied so that the bars are not superimposed.
  geom_bar(stat = "identity", position = position_dodge(width = 0.7),
           width = 0.5) +

  # Position dodge applied so that the error bars are not superimposed. The
  # width can be tweaked to ensure that are positioned correctly on top of the
  # bars
  geom_errorbar(aes(ymin = mean - ic, ymax = mean + ic),
                width = 0.15,
                size = 0.5,
                position = position_dodge(width = 0.7)) +

  # Adding the text which will display the mean of the bars. They are positioned
  # 0.035 units on top of (mean + ic). This ensures that there is some space
  # between the error bars and the text.
  # As with previous geometric objects, position dodge has been applied here as
  # well so that the mean is not superimposed.
  geom_text(aes(y = mean + ic + 0.035, label = formatC(mean*100, digits = 1,
                                                       format = "f")),
            position = position_dodge(width = 0.7),
            size = 2.8) +

  # Setting the legend/scale colors
  scale_fill_manual(values = c("#264D96", "#5480D6", "#A8BFEB", "#DEE5F7")) +

  # Setting the x-axis labels. The expand option removes the space around the
  # axis and the data. Please feel free to change the values and see how the
  # plot is affected.
  # \n is the new line character, used to create a line break
  scale_x_discrete(labels = c("Anemic", "High Blood\n Pressure", "Overweight"),
                   expand = c(0, 0)) +

  # Customizing the y-axis by changing the limits, axis tick values and labels
  scale_y_continuous(expand = c(0, 0),
                     limits = c(0, max(my_sum$mean) + 0.2),
                     breaks = c(seq(from = 0, to = 0.7, by = 0.1)),
                     labels = c("0", "10", "20", "30", "40",
                                "50", "60", "70")) +

  # Graph title and axis labels
  labs(title = "Health indicator prevalence, by region",
       y = "Percent %") +

  # Flipping the coordinates to create horizontal bars
  coord_flip()

############################# Formatting the graph #############################
plot +
  theme_classic() +
  # The following visual changes have been made:
    # Removed y-axis title (flipped coordinates)
    # Made the y-axis line a light shade of gray
    # Removed the x-axis line
    # Created x-axis grid lines which are light gray and dotted
    # Removed axis ticks
    # Removed legend title
    # Vertically aligned and resized x-axis text
    # Center aligned and resized the plot title
  theme(text = element_text(family = "Inter"),
        axis.title.y = element_blank(),
        axis.line.y = element_line(color = "gray80"),
        axis.line.x = element_blank(),
        panel.grid.major.x = element_line(colour = "gray90", linetype = "dotted"),
        axis.ticks = element_blank(),
        legend.title = element_blank(),
        axis.text.x = element_text(vjust = 0.5, size = 10),
        plot.title = element_text(hjust = 0.5, size = 12))

############################# Saving and exporting #############################
# indicating the export folder and the image file name
export_folder <- "R/Bar graphs/Exports/"
img_name <- "bar_horizontal_R_reviewed.png"
ggsave(paste(export_folder,img_name,sep = ""))

Other details

Stata

Credit: Crystal Huang

You must have IDinsight styles installed to get replicate the chart above.

R

Code written by Arkadeep Bandyopadhyay and reviewed by Sandra Alemayehu.
Colors for the graph have been selected from IDinsight’s brand guide.