The chart
Data
Sample dataset can be downloaded here
The code
Stata
cd "C:\Users\jeffe\Dropbox (IDinsight)\Technical bootcamp online\Data Visualization\Graph examples"
* Change the graph font here if you want
// graph set window fontface "Times New Roman"
*****************************
** HH-level group averages **
*****************************
foreach x in 0 1{
* Prep data
use "Voter Rolls Data for Bar Graph", clear
* Drop unmatched voters
drop if matched == 2
* Excluding PS 66
drop if psid == 66
* Unmatched dead individuals: Drop
drop if Cindividual_type == 5 & matched == 1
* Keep relevant
keep if ps_type == `x'
* Collapse to HH level
* Identify households in census
*No. of households with at least one match on any merge type
gen num_matched = (matched == 3)
bys Ccensus_hhid: egen total_num_matched = total(num_matched)
bys Ccensus_hhid: gen hh_matched = (total_num_matched != 0) if !mi(total_num_matched)
* Retain value labels
unab collapse_vars: Ccaste Creligion Chh_size Cedu_hh_head_spouse Cassets*
foreach var in `collapse_vars'{
local `var'_lab: value label `var'
}
* Collapse
collapse (mean) `collapse_vars' hh_matched, by(ps_type Ccensus_hhid)
* Replace value labels
foreach var in `collapse_vars'{
label val `var' ``var'_lab'
}
numlabel _all, add
* Calculate match rates by subgroup
* Caste
* General
count if Ccaste == 4 & hh_matched == 1
local num `r(N)'
count if Ccaste == 4
local match_caste_gen 100*`num'/`r(N)'
* OBC
count if Ccaste == 3 & hh_matched == 1
local num `r(N)'
count if Ccaste == 3
local match_caste_obc 100*`num'/`r(N)'
* SC
count if Ccaste == 1 & hh_matched == 1
local num `r(N)'
count if Ccaste == 1
local match_caste_sc 100*`num'/`r(N)'
* ST
count if Ccaste == 2 & hh_matched == 1
local num `r(N)'
count if Ccaste == 2
local match_caste_st 100*`num'/`r(N)'
* Religion
* Hindu
count if Creligion == 1 & hh_matched == 1
local num `r(N)'
count if Creligion == 1
local match_religion_hindu 100*`num'/`r(N)'
* Muslim
count if Creligion == 2 & hh_matched == 1
local num `r(N)'
count if Creligion == 2
local match_religion_muslim 100*`num'/`r(N)'
* Poverty status: Wealth quartile
* Prep
*Household Size Score
su Chh_size
recode Chh_size (1=41) (2=34) (3=26) (4=19) (5=11) (6=7) (7=4) (8/`r(max)'=0), gen(size_ppi)
*General education level of the female head/spouse score
su Cedu_hh_head_spouse
recode Cedu_hh_head_spouse (-999=.d) (-888=.r) (3 4 =5) (2 =3) (1 =0) (.=5), gen(female_ppi)
*Asset Scores
recode Cassets_refrigerator (-999=.d) (-888=.r) (1=11), gen(refrigerator_ppi)
recode Cassets_stove (-999=.d) (-888=.r) (1=2), gen(stove_ppi)
recode Cassets_pressure (-999=.d) (-888=.r) (1=4), gen(cook_ppi)
recode Cassets_television (-999=.d) (-888=.r) (1=5), gen(tv_ppi)
recode Cassets_fan (-999=.d) (-888=.r) (1=3), gen(fan_ppi)
recode Cassets_dressing (-999=.d) (-888=.r) (1=4), gen(dressing_almirah_ppi)
recode Cassets_table (-999=.d) (-888=.r) (1=6), gen(furniture_ppi)
recode Cassets_moto (-999=.d) (-888=.r) (1=19), gen(vehicle_ppi)
*Final calculations
egen ppi_score_raw=rowtotal(*_ppi), missing
egen ppi_score_miss=rowmiss(*_ppi)
*Reset ppi_score_raw to missing if any of the constituent parts are missing
replace ppi_score_raw=.m if ppi_score_miss>0
*Classify by quartile
xtile ppi_quartile = ppi_score_raw, nq(4)
label define ppi_quartile 4 "Top 25%" 3 "50-75%" 2 "25-50%" 1 "Bottom 25%"
label values ppi_quartile ppi_quartile
label variable ppi_quartile "Wealth quartile"
* Top 25%
count if ppi_quartile == 4 & hh_matched == 1
local num `r(N)'
count if ppi_quartile == 4
local match_wealth_top25 100*`num'/`r(N)'
* 50-75%
count if ppi_quartile == 3 & hh_matched == 1
local num `r(N)'
count if ppi_quartile == 3
local match_wealth_50to75 100*`num'/`r(N)'
* 25-50%
count if ppi_quartile == 2 & hh_matched == 1
local num `r(N)'
count if ppi_quartile == 2
local match_wealth_25to50 100*`num'/`r(N)'
* Bottom 25%
count if ppi_quartile == 1 & hh_matched == 1
local num `r(N)'
count if ppi_quartile == 1
local match_wealth_bottom25 100*`num'/`r(N)'
*************************************
** Individual-level group averages **
*************************************
* Prep data
use "Voter Rolls Data for Bar Graph", clear
* Drop unmatched voters
drop if matched == 2
* Excluding PS 66
drop if psid == 66
* Unmatched dead individuals: Drop
drop if Cindividual_type == 5 & matched == 1
* Identify matched
gen indiv_matched = (matched == 3)
* Keep relevant
keep if ps_type == `x'
* Calculate match rates by subgroup
* Age groups
* Below 30
count if Cage <= 30 & !missing(Cage) & indiv_matched == 1
local num `r(N)'
count if Cage <= 30 & !missing(Cage)
local match_age_below30 100*`num'/`r(N)'
* 30 to 50
count if Cage > 30 & Cage <= 50 & !missing(Cage) & indiv_matched == 1
local num `r(N)'
count if Cage > 30 & Cage <= 50 & !missing(Cage)
local match_age_30to50 100*`num'/`r(N)'
* Above 50
count if Cage > 50 & !missing(Cage) & indiv_matched == 1
local num `r(N)'
count if Cage > 50 & !missing(Cage)
local match_age_above50 100*`num'/`r(N)'
* Gender
* Male
count if Cgender == 0 & indiv_matched == 1
local num `r(N)'
count if Cgender == 0
local match_gender_male 100*`num'/`r(N)'
* Female
count if Cgender == 1 & indiv_matched == 1
local num `r(N)'
count if Cgender == 1
local match_gender_female 100*`num'/`r(N)'
* Graph
* Create bar order
gen bar = _n if _n < 26
* Match rates
gen match = .
format match %4.0f
replace match = `match_age_above50' if _n == 1
replace match = `match_age_30to50' if _n == 2
replace match = `match_age_below30' if _n == 3
// Age subheader in 4
// Space in 5
replace match = `match_gender_female' if _n == 6
replace match = `match_gender_male' if _n == 7
// Gender subheader in 8
// Individual-level match rates header 9
// Space in 10
replace match = `match_wealth_bottom25' if _n == 11
replace match = `match_wealth_25to50' if _n == 12
replace match = `match_wealth_50to75' if _n == 13
replace match = `match_wealth_top25' if _n == 14
// Wealth quartile subheader in 15
// Space in 16
replace match = `match_religion_muslim' if _n == 17
replace match = `match_religion_hindu' if _n == 18
// Religion subheader in 19
// Space in 20
replace match = `match_caste_st' if _n == 21
replace match = `match_caste_sc' if _n == 22
replace match = `match_caste_obc' if _n == 23
replace match = `match_caste_gen' if _n == 24
// Caste group subheader in 25
// Household-level match rates header 26
* Bar color
if `x' == 0{
local c navy
}
else{
local c navy*0.5
}
* Graph title
if `x' == 0{
local graphtitle "Rural"
}
else{
local graphtitle "Urban"
}
* Create bar graph
twoway (bar match bar , barw(0.50) horizontal lcolor(black) fcolor(`c')) ///
(scatter bar match, mlab(match) msym(none) mlabcolor(black) mlabsize(*0.8)) ///
, ///
ylabel( 1 "Above 50" 2 "30 to 50" 3 "18 to 30" 4 "{it:Age (years)}" ///
6 "Female" 7 "Male" 8 "{it:Gender}" 9 "{bf:Individual-level}" ///
11 "Bottom 25%" 12 "25-50%" 13 "50-75%" 14 "Top 25%" 15 "{it:Wealth quartile}" ///
17 "Muslim" 18 "Hindu" 19 "{it:Religious group}" ///
21 "ST" 22 "SC" 23 "OBC" 24 "General" 25 "{it:Caste group}" 26 "{bf:Household-level}" ///
, nogrid labgap(*0.5) labsize(small) angle(horizontal)) ///
ytitle("") ///
title("{bf:`graphtitle'}", size(large)) ///
yscale(range(0 26)) ///
xlabel(0(20)100, glwidth(thin) glpattern(solid) glcolor(gs14) nogmax) ///
xtitle("{bf: Match Rates (%)}", size(medsmall)) ///
legend(off) ///
scheme(s1color) ///
plotregion(margin(zero) style(none)) ///
name("demo_group_match_rates`x'",replace)
}
graph combine demo_group_match_rates0 demo_group_match_rates1, ///
subtitle("Match Rates by Demographic Group", size(medlarge)) ///
scheme(s1color)
graph export "Voter Rolls Graph.png", replace width(4000)
R
Coming soon.
Other details
NA