Match rates

by demographic group

Previous Next

The chart

Data

Sample dataset can be downloaded here

The code

Stata

cd "C:\Users\jeffe\Dropbox (IDinsight)\Technical bootcamp online\Data Visualization\Graph examples"

* Change the graph font here if you want
// graph set window fontface "Times New Roman"

*****************************
** HH-level group averages **
*****************************

foreach x in 0 1{

* Prep data
use "Voter Rolls Data for Bar Graph", clear

	* Drop unmatched voters
	drop if matched == 2

	* Excluding PS 66
	drop if psid == 66

	* Unmatched dead individuals: Drop
	drop if Cindividual_type == 5 & matched == 1

	* Keep relevant
	keep if ps_type == `x'

* Collapse to HH level

	* Identify households in census

		*No. of households with at least one match on any merge type
		gen num_matched = (matched == 3)
		bys Ccensus_hhid: egen total_num_matched = total(num_matched)
		bys Ccensus_hhid: gen hh_matched = (total_num_matched != 0) if !mi(total_num_matched)

	* Retain value labels
	unab collapse_vars: Ccaste Creligion Chh_size Cedu_hh_head_spouse Cassets*
	foreach var in `collapse_vars'{
		local `var'_lab: value label `var'
	}

	* Collapse
	collapse (mean) `collapse_vars' hh_matched, by(ps_type Ccensus_hhid)

	* Replace value labels
	foreach var in `collapse_vars'{
		label val `var' ``var'_lab'
	}

	numlabel _all, add

* Calculate match rates by subgroup

	* Caste

		* General
		count if Ccaste == 4 & hh_matched == 1
		local num `r(N)'
		count if Ccaste == 4
		local match_caste_gen 100*`num'/`r(N)'

		* OBC
		count if Ccaste == 3 & hh_matched == 1
		local num `r(N)'
		count if Ccaste == 3
		local match_caste_obc 100*`num'/`r(N)'

		* SC
		count if Ccaste == 1 & hh_matched == 1
		local num `r(N)'
		count if Ccaste == 1
		local match_caste_sc 100*`num'/`r(N)'

		* ST
		count if Ccaste == 2 & hh_matched == 1
		local num `r(N)'
		count if Ccaste == 2
		local match_caste_st 100*`num'/`r(N)'

	* Religion

		* Hindu
		count if Creligion == 1 & hh_matched == 1
		local num `r(N)'
		count if Creligion == 1
		local match_religion_hindu 100*`num'/`r(N)'

		* Muslim
		count if Creligion == 2 & hh_matched == 1
		local num `r(N)'
		count if Creligion == 2
		local match_religion_muslim 100*`num'/`r(N)'

	* Poverty status: Wealth quartile

		* Prep

			*Household Size Score
			su Chh_size
			recode Chh_size (1=41) (2=34) (3=26) (4=19) (5=11) (6=7) (7=4) (8/`r(max)'=0), gen(size_ppi)

			*General education level of the female head/spouse score
			su Cedu_hh_head_spouse
			recode Cedu_hh_head_spouse (-999=.d) (-888=.r) (3 4 =5) (2 =3) (1 =0) (.=5), gen(female_ppi)

			*Asset Scores
			recode Cassets_refrigerator (-999=.d) (-888=.r) (1=11), gen(refrigerator_ppi)
			recode Cassets_stove (-999=.d) (-888=.r) (1=2), gen(stove_ppi)
			recode Cassets_pressure (-999=.d) (-888=.r) (1=4), gen(cook_ppi)
			recode Cassets_television (-999=.d) (-888=.r) (1=5), gen(tv_ppi)
			recode Cassets_fan (-999=.d) (-888=.r) (1=3), gen(fan_ppi)
			recode Cassets_dressing (-999=.d) (-888=.r) (1=4), gen(dressing_almirah_ppi)
			recode Cassets_table (-999=.d) (-888=.r) (1=6), gen(furniture_ppi)
			recode Cassets_moto (-999=.d) (-888=.r) (1=19), gen(vehicle_ppi)

			*Final calculations
			egen ppi_score_raw=rowtotal(*_ppi), missing
			egen ppi_score_miss=rowmiss(*_ppi)
			*Reset ppi_score_raw to missing if any of the constituent parts are missing
			replace ppi_score_raw=.m if ppi_score_miss>0

			*Classify by quartile
			xtile ppi_quartile = ppi_score_raw, nq(4)
			label define ppi_quartile 4 "Top 25%" 3 "50-75%" 2 "25-50%" 1 "Bottom 25%"
			label values ppi_quartile ppi_quartile
			label variable ppi_quartile "Wealth quartile"

		* Top 25%
		count if ppi_quartile == 4 & hh_matched == 1
		local num `r(N)'
		count if ppi_quartile == 4
		local match_wealth_top25 100*`num'/`r(N)'

		* 50-75%
		count if ppi_quartile == 3 & hh_matched == 1
		local num `r(N)'
		count if ppi_quartile == 3
		local match_wealth_50to75 100*`num'/`r(N)'

		* 25-50%
		count if ppi_quartile == 2 & hh_matched == 1
		local num `r(N)'
		count if ppi_quartile == 2
		local match_wealth_25to50 100*`num'/`r(N)'

		* Bottom 25%
		count if ppi_quartile == 1 & hh_matched == 1
		local num `r(N)'
		count if ppi_quartile == 1
		local match_wealth_bottom25 100*`num'/`r(N)'

*************************************
** Individual-level group averages **
*************************************

* Prep data
use "Voter Rolls Data for Bar Graph", clear

	* Drop unmatched voters
	drop if matched == 2

	* Excluding PS 66
	drop if psid == 66

	* Unmatched dead individuals: Drop
	drop if Cindividual_type == 5 & matched == 1

	* Identify matched
	gen indiv_matched = (matched == 3)

	* Keep relevant
	keep if ps_type == `x'

	* Calculate match rates by subgroup

		* Age groups

			* Below 30
			count if Cage <= 30 & !missing(Cage) & indiv_matched == 1
			local num `r(N)'
			count if Cage <= 30 & !missing(Cage)
			local match_age_below30 100*`num'/`r(N)'

			* 30 to 50
			count if Cage > 30 & Cage <= 50 & !missing(Cage) & indiv_matched == 1
			local num `r(N)'
			count if Cage > 30 & Cage <= 50 & !missing(Cage)
			local match_age_30to50 100*`num'/`r(N)'

			* Above 50
			count if Cage > 50 & !missing(Cage) & indiv_matched == 1
			local num `r(N)'
			count if Cage > 50 & !missing(Cage)
			local match_age_above50 100*`num'/`r(N)'

		* Gender

			* Male
			count if Cgender == 0 & indiv_matched == 1
			local num `r(N)'
			count if Cgender == 0
			local match_gender_male 100*`num'/`r(N)'

			* Female
			count if Cgender == 1 & indiv_matched == 1
			local num `r(N)'
			count if Cgender == 1
			local match_gender_female 100*`num'/`r(N)'

	* Graph

		* Create bar order
		gen bar = _n if _n < 26

		* Match rates
		gen match = .
		format match %4.0f
		replace match = `match_age_above50' if _n == 1
		replace match = `match_age_30to50' if _n == 2
		replace match = `match_age_below30' if _n == 3
		// Age subheader in 4
		// Space in 5
		replace match = `match_gender_female' if _n == 6
		replace match = `match_gender_male' if _n == 7
		// Gender subheader in 8
		// Individual-level match rates header 9
		// Space in 10
		replace match = `match_wealth_bottom25' if _n == 11
		replace match = `match_wealth_25to50' if _n == 12
		replace match = `match_wealth_50to75' if _n == 13
		replace match = `match_wealth_top25' if _n == 14
		// Wealth quartile subheader in 15
		// Space in 16
		replace match = `match_religion_muslim' if _n == 17
		replace match = `match_religion_hindu' if _n == 18
		// Religion subheader in 19
		// Space in 20
		replace match = `match_caste_st' if _n == 21
		replace match = `match_caste_sc' if _n == 22
		replace match = `match_caste_obc' if _n == 23
		replace match = `match_caste_gen' if _n == 24
		// Caste group subheader in 25
		// Household-level match rates header 26

		* Bar color
		if `x' == 0{
			local c navy
		}
		else{
			local c navy*0.5
		}

		* Graph title
		if `x' == 0{
			local graphtitle "Rural"
		}
		else{
			local graphtitle "Urban"
		}

	* Create bar graph
	twoway	(bar match bar , barw(0.50) horizontal lcolor(black) fcolor(`c')) ///
			(scatter bar match, mlab(match) msym(none) mlabcolor(black) mlabsize(*0.8)) ///
			, ///
			ylabel(	1 "Above 50" 2 "30 to 50" 3 "18 to 30" 4 "{it:Age (years)}" ///
					6 "Female" 7 "Male" 8 "{it:Gender}" 9 "{bf:Individual-level}" ///
					11 "Bottom 25%" 12 "25-50%" 13 "50-75%" 14 "Top 25%" 15 "{it:Wealth quartile}" ///
					17 "Muslim" 18 "Hindu" 19 "{it:Religious group}" ///
					21 "ST" 22 "SC" 23 "OBC" 24 "General" 25 "{it:Caste group}" 26 "{bf:Household-level}" ///
					, nogrid labgap(*0.5) labsize(small) angle(horizontal)) ///
			ytitle("") ///
			title("{bf:`graphtitle'}", size(large)) ///
			yscale(range(0 26)) ///
			xlabel(0(20)100, glwidth(thin) glpattern(solid) glcolor(gs14) nogmax) ///
			xtitle("{bf: Match Rates (%)}", size(medsmall)) ///
			legend(off) ///
			scheme(s1color) ///
			plotregion(margin(zero) style(none)) ///
			name("demo_group_match_rates`x'",replace)

}
	graph combine demo_group_match_rates0 demo_group_match_rates1, ///
		subtitle("Match Rates by Demographic Group", size(medlarge)) ///
		scheme(s1color)
	graph export "Voter Rolls Graph.png", replace width(4000)

R

Coming soon.

Other details

NA