cls
clear
set scheme s1color
graph set window fontface "Arial Narrow"

* Directory
cd "/Users/tristany/Desktop/Replication_AJRR/Data/"

* Load WBES data
use "dta/New_Comprehensive_March_1_2024.dta", clear

* Destring year
gen yr=substr(country,-4,.)
destring yr, gen(year)

* Extract country
ereplace country = sieve(country), omit(0123456789)

* Adjust string
replace country="Antigua and Barbuda" if country=="Antiguaandbarbuda"
replace country="Bahamas, The" if country=="Bahamas"
replace country="Burkina Faso" if country=="BurkinaFaso"
replace country="Cabo Verde" if country=="CapeVerde"
replace country="Congo, Dem. Rep." if country=="Congo"
replace country="Congo, Dem. Rep." if country=="DRC"
replace country="Costa Rica" if country=="Costarica"
replace country="Dominican Republic" if country=="DominicanRepublic"
replace country="Egypt, Arab Rep." if country=="Egypt"
replace country="Yemen, Rep." if country=="Yemen"
replace country="Turkiye" if country=="Türkiye"
replace country="Trinidad and Tobago" if country=="TrinidadandTobago"
replace country="South Africa" if country=="SouthAfrica"
replace country="Sri Lanka" if country=="SriLanka"
replace country="Russian Federation" if country=="Russia"
replace country="Papua New Guinea" if country=="PapuaNewGuinea"
replace country="Micronesia, Fed. Sts." if country=="Micronesia"
replace country="Cote d'Ivoire" if country=="Côte d'Ivoire"
replace country="Gambia, The" if country=="Gambia"
replace country="Guinea-Bissau" if country=="GuineaBissau"
replace country="Lao PDR" if country=="LaoPDR"
replace country="El Salvador" if country=="ElSalvador"
replace country="St. Kitts and Nevis" if country=="StKittsandNevis"
replace country="St. Lucia" if country=="StLucia"
replace country="St. Vincent and the Grenadines" if country=="StVincentandGrenadines"
replace country="Venezuela, RB" if country=="Venezuela"
replace country="South Sudan" if country=="Southsudan"

* Drop Luxembourg
drop if country=="Luxembourg"

* Merge with GDPxc (2017 international USD at PPP)
sort country year
merge country year using "dta/gdpxc2017.dta"
keep if _merge==3
drop _merge

* Select sample  
bys country: gen  tmp = 1 if gdpxc2017>=25000 & gdpxc2017!=.
bys country: egen tmp2 = sum(tmp)
keep if tmp2>0
keep if gdpxc2017!=.
rename gdpxc2017 gdpxc
gen log_gdpxc=log(gdpxc)
drop tmp tmp2

* Age
gen begin_year=b5
replace begin_year=. if begin_year<0
replace begin_year=. if begin_year<=1920
replace begin_year=. if begin_year>year
gen age = year-begin_year


* Construct r&d 
gen r_d = h8
replace r_d = . if h8<0
replace r_d = 0 if h8==2

gen process_inv = h5
replace process_inv=. if h5<0
replace process_inv=0 if h5==2

gen product_inv = h1
replace product_inv=. if h1<0
replace product_inv=0 if h1==2


* Only formal firms
drop if b6a==2
drop if b6a<0


* Construct firm size
replace l1=. if l1<0
replace l1=. if l1>999990
gen log_l1=log(l1)

* Construct firm growth
replace l2=. if l2<=0
replace l2=. if l2>999990
gen log_l2=log(l2)

gen l0=b6 
replace l0=. if b6<0
replace l0=. if b6>999990
gen log_l0=log(l0)



gen Delta_l= 100*(log(l1)-log(l0)) 
sum Delta_l, d
egen Delta_l_p99=pctile(Delta_l), p(99)
egen Delta_l_p01=pctile(Delta_l), p(01)
replace Delta_l=. if Delta_l<0.5*Delta_l_p01
replace Delta_l=. if Delta_l>2*Delta_l_p99
drop Delta_l_p*

gen Delta_l_annualized=Delta_l/(1+age)
sum Delta_l_annualized, d
egen Delta_l_p99=pctile(Delta_l_annualized), p(99)
egen Delta_l_p01=pctile(Delta_l_annualized), p(01)
replace Delta_l_annualized=. if Delta_l_annualized<0.5*Delta_l_p01
replace Delta_l_annualized=. if Delta_l_annualized>2*Delta_l_p99
drop Delta_l_p*


gen Delta_l_xperiod= 100*(log(l1)-log(l2))/3
sum Delta_l_xperiod, d
egen Delta_l_p99=pctile(Delta_l_xperiod), p(99)
egen Delta_l_p01=pctile(Delta_l_xperiod), p(01)
replace Delta_l_xperiod=. if Delta_l_xperiod<0.5*Delta_l_p01
replace Delta_l_xperiod=. if Delta_l_xperiod>2*Delta_l_p99
drop Delta_l_p*

* Wage bill
gen wagebill=n2a
replace wagebill=. if n2a<0
gen log_wagebill=log(wagebill)


* Wage rate
gen wagerate=n2a/l1
gen log_wagerate=log(wagerate)


* Foreign 
gen ftech=0 if e6==2
replace ftech=1 if e6==1 
replace ftech=. if e6<0
 
 
* Age group
gen    agegroup=0  if age==0  
replace agegroup=1 if age>=1  & age<10
replace agegroup=2 if age>=10 & age<20
replace agegroup=3 if age>=20 & age<30
replace agegroup=4 if age>=30 & age<40
replace agegroup=5 if age>=40  & age!=.
tab agegroup, gen(agegroupid)


gen agegroup2 =0  if age==0
replace agegroup2=1  if age>=1  & age<=10 
replace agegroup2=2  if age>10  & age!=.
tab agegroup2, gen(agegroup2id)


gen agegroup3 =0  if age==0
replace agegroup3=1  if age>=0  & age<=10 
replace agegroup3=2  if age>10  & age<=25
replace agegroup3=3  if age>25  & age!=.
tab agegroup3, gen(agegroup3id)


gen isic_v2=isic_v3_1
replace isic_v2=1 if isic_v3_1>=10 & isic_v3_1<=19
replace isic_v2=2 if isic_v3_1>=20 & isic_v3_1<=29
replace isic_v2=3 if isic_v3_1>=30 & isic_v3_1<=39
replace isic_v2=4 if isic_v3_1>=40 & isic_v3_1<=49
replace isic_v2=5 if isic_v3_1>=50 & isic_v3_1<=59
replace isic_v2=6 if isic_v3_1>=60 & isic_v3_1<=69
replace isic_v2=7 if isic_v3_1>=70 & isic_v3_1<=99

gen legalstatus=.
replace legalstatus=1 if (b1==1 | b1==2)
replace legalstatus=0 if (b1>=3 & b1!=.)

gen foreign=.
replace foreign=. if b2b<0
replace foreign=1 if b2b>0
replace foreign=0 if b2b==0

gen exporter=.
replace exporter=0 if d3b==0 & d3c==0
replace exporter=1 if (d3b>0 | d3c>0)

gen importer= d13 
replace importer=. if d13<0
replace importer=0 if d13==2

****************************************
* OBSERVATIONS IN LOCAL LABOR MARKET
bys a2x isic_v3_1 country year: egen obs=sum(wt_rs)


****************************************
* Merge with production data
sort idstd country year
merge idstd country year using "dta/Firm Level TFP Estimates and Factor Ratios_March_1_2024.dta"
keep if _merge==3
drop _merge
keep if sector_MS=="Manufacturing"


***************************************
****************************************
****************************************

* Estimation of markdown by country
gen md=.
gen b_hat=.

levelsof CountryCode, local(countryid)
foreach c of local countryid{
 
qui: reghdfe log_d2 log_l1 c.log_n2e##c.log_n2e##c.log_n2e c.log_n7a##c.log_n7a##c.log_n7a  c.log_n7a#c.log_n2e [aw=wt_rs] if CountryCode== "`c'" , noabsorb

replace md=_b[log_l1]*exp(log_d2-log_n2a) if CountryCode== "`c'" 
replace b_hat=_b[log_l1] if CountryCode== "`c'" 

}


* Estimation of markdown with controls
gen md_control=.
gen b_hat_control=.

levelsof CountryCode, local(countryid)
local countryid_exclude SAU
local countryid_final: list countryid-countryid_exclude

foreach c of local countryid_final {
tab CountryCode if CountryCode== "`c'"


qui: reghdfe log_d2 log_l1 c.log_n2e##c.log_n2e##c.log_n2e c.log_n7a##c.log_n7a##c.log_n7a  c.log_n7a#c.log_n2e [aw=wt_rs] if CountryCode== "`c'" , absorb(age year isic_v3_1 a2x foreign legal)

replace md_control=_b[log_l1]*exp(log_d2-log_n2a) if CountryCode== "`c'"  
replace b_hat_control=_b[log_l1] if CountryCode== "`c'"   

}




****************************************
**** SUMMARY STATS
* Table A.1
tab country year

* Table A.2
preserve
collapse (mean) l1 Delta_l Delta_l_a age r_d process_ product_ log_d2 log_n2e log_n7a foreign legal obs [w=wt_rs], by(a2x isic_v3_1 country year)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

collapse (mean) l1 Delta_l Delta_l_a age r_d process_ product_ log_d2 log_n2e log_n7a foreign legal [w=shares], by(country year)
tabstat l1 Delta_l  Delta_l_a age r_d process_ product_ log_d2 log_n2e log_n7a foreign legal, stat(N mean median sd p25 p75) col(stat)
restore

 
* Table A.3
tabstat b_hat [w=wt_rs], stat(mean median sd p25 p75) 

 

****************************************
*** Average markdown
preserve
collapse (mean) md md_control gdp obs [w=wt_rs], by(a2x isic_v3_1 country year)


qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

collapse (mean) md md_control gdp  [w=shares], by(country year)

gen log_gdp=log(gdp)
gen log_md=log(md)
gen log_md_control=log(md_control)

reghdfe log_md log_gdp, noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)


binscatter md gdp,  line(lfit) lc(black)  ytitle("Average markdown",size(*1.5)) xtitle("GDP (2017 USD)",size(*1.5))  ylabel(0(1.5)6)  xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/mean_markdown_gdp") replace
gr export "out/mean_markdown_gdp.eps", replace


reghdfe log_md_c log_gdp, noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)


binscatter md_c gdp,  line(lfit) lc(black)  ytitle("Average markdown",size(*1.5)) xtitle("GDP (2017 USD)",size(*1.5))  ylabel(0(1.5)6)  xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/mean_markdown_gdp_control") replace
gr export "out/mean_markdown_gdp_control.eps", replace


tw (scatter md_c md if md_c>0, mc(edkblue)) (lfit md md, lc(maroon)), ytitle("Mean markdown (Controls)",size(*1.5)) xtitle("Mean markdown (No controls)",size(*1.5)) xlabel(0(2)12) ylabel(0(2)12) legend(order(2 "45-degree line"))
gr export "out/mean_markdown_gdp_control_vs_nocontrol.eps", replace

restore
 


****************************************
*** Median markdown
preserve
collapse (median) md md_c (mean) gdp obs [w=wt_rs], by(a2x isic_v3_1  country year)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

collapse (median) md md_c (mean) gdp  [w=shares], by(country year)

gen log_gdp=log(gdp)
gen log_md=log(md)
gen log_md_c=log(md_c)

reghdfe log_md log_gdp,  noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)

binscatter md gdp,  line(lfit) lc(black)  ytitle("Median markdown",size(*1.5)) xtitle("GDP (2017 USD)",size(*1.5)) ylabel(0.5(0.5)3.5)   xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/median_markdown_gdp") replace
gr export "out/median_markdown_gdp.eps", replace

 
reghdfe log_md_c log_gdp,  noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)

binscatter md_c gdp,  line(lfit) lc(black)  ytitle("Median markdown",size(*1.5)) xtitle("GDP (2017 USD)",size(*1.5)) ylabel(0.5(0.5)3.5)  xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/median_markdown_gdp_control") replace
gr export "out/median_markdown_gdp_control.eps", replace


tw (scatter md_c md if md_c>0, mc(edkblue)) (lfit md md, lc(maroon)), ytitle("Median markdown (Controls)",size(*1.5)) xtitle("Median markdown (No controls)",size(*1.5)) xlabel(0(1)4) ylabel(0(1)4) legend(order(2 "45-degree line"))
gr export "out/median_markdown_gdp_control_vs_nocontrol.eps", replace

restore


 
* Aggregate (sales weighted)
preserve

gen sales=d2
replace sales=. if d2<0
gen sales_wt_rs = sales*wt_rs

collapse (median) md  md_c (mean) gdp obs [w=sales_wt_rs], by(a2x isic_v3_1  country year)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

collapse (median) md md_c (mean) gdp  [w=shares], by(country year)

gen log_gdp=log(gdp)
gen log_md=log(md)
gen log_md_c=log(md_c)


reghdfe log_md log_gdp,  noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)


binscatter md gdp,  line(lfit) lc(black)  ytitle("Aggregate markdown",size(*1.5)) xtitle("GDP (2017 USD)",size(*1.5)) ylabel(0(1.5)6)   xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/median_markdown_gdp") replace
gr export "out/aggregate_markdown_gdp.eps", replace


reghdfe log_md_c log_gdp,  noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)


binscatter md_c gdp,  line(lfit) lc(black)  ytitle("Aggregate markdown",size(*1.5)) xtitle("GDP (2017 USD)",size(*1.5)) ylabel(0(1.5)7.5)   xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/median_markdown_gdp_control") replace
gr export "out/aggregate_markdown_gdp_control.eps", replace

restore




****************************************
* Average R&D across countries
preserve
collapse (mean) r_d process_ product_ ftech gdp obs [w=wt_rs], by(a2x isic_v3_1 country year)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

collapse (mean) r_d process_ product_ ftech gdp   [w=shares], by(country year)

gen log_gdp=log(gdp)

reghdfe r_d log_gdp, noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)

binscatter r_d gdp, line(lfit) lc(black)  ytitle("Firms performing r&d", size(*1.5)) xtitle("GDP (2017 USD)", size(*1.5))  ylabel(0.05(0.10)0.55) xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/avg_rd_gdp") replace
gr export "out/avg_rd_gdp.eps", replace

reghdfe process_ log_gdp, noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)


binscatter process_ gdp,  line(lfit) lc(black)   ytitle("Firms performing process innovation", size(*1.5)) xtitle("GDP (2017 USD)", size(*1.5))  ylabel(0.10(0.10)0.60) xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/avg_process_gdp") replace
gr export "out/avg_process_gdp.eps", replace


reghdfe product_ log_gdp, noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)

binscatter product_ gdp,  line(lfit) lc(black)  ytitle("Firms performing product innovation", size(*1.5)) xtitle("GDP (2017 USD)", size(*1.5))  ylabel(0(0.20)1) xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/avg_product_gdp") replace
gr export "out/avg_product_gdp.eps", replace



reghdfe ftech log_gdp, noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)


binscatter ftech gdp,  line(lfit) lc(black)  ytitle("Firms using foreign technology", size(*1.5)) xtitle("GDP (2017 USD)", size(*1.5))  ylabel(0(0.10)0.4) xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/avg_ftech_gdp") replace
gr export "out/avg_ftech_gdp.eps", replace
restore


****************************************
* Average firm growth across countries
preserve
collapse Delta_l  gdp  obs age [w=wt_rs], by(a2x isic_v3_1 country year)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

collapse (mean) Delta_l gdp age [w=shares], by(country year)

gen log_gdp=log(gdp)

reghdfe Delta_l log_gdp, noabsorb vce(r)
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)

binscatter Delta_l gdp,line(lfit) lc(black) ytitle("Average firm growth, %",  size(*1.5)) xtitle("GDP (2017 USD)",  size(*1.5)) xlabel(20000(20000)80000) ylabel(50(25)175) 	caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/avg_firmsizegrowth_gdp") replace
gr export "out/avg_firmsizegrowth_gdp.eps", replace
restore


****************************************
* Average firm growth across countries
preserve
collapse (mean) Delta_l_a gdp obs [w=wt_rs] , by(age a2x isic_v3_1 country year)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

collapse (mean) Delta_l_a gdp [w=shares], by(age country year)

gen log_gdp=log(gdp)
reghdfe Delta_l log_gdp, absorb(age) vce(r) 
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)

binscatter Delta_l gdp, absorb(age) line(lfit) lc(black) ytitle("Average firm growth, %",  size(*1.5)) xtitle("GDP (2017 USD)",  size(*1.5)) xlabel(20000(20000)80000) ylabel(4(2)10) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/avg_firmsizegrowth_gdp_annualized") replace

gr export "out/avg_firmsizegrowth_gdp_annualized.eps", replace
restore


* By cohort
preserve
keep if age!=.
collapse Delta_l  l1 gdp obs [w=wt_rs], by(a2x isic_v3_1 country year agegroup)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

rename l1 l1_
collapse (mean) Delta_l l1_ gdp [w=shares], by(agegroup country year)

gen log_gdpxc=log(gdpxc)
reshape wide Delta_l l1_ gdpxc, i(country year) j(agegroup)

reg Delta_l5 log_gdp, r
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)

binscatter Delta_l5 gdpxc5, line(lfit) lc(black)  ytitle("Average firm growth, %", size(*1.5)) xtitle("GDP (2017 USD)", size(*1.5)) ylabel(50(50)300)  xlabel(20000(20000)80000) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/avg_firmsizegrowth_50years_gdp") replace
gr export "out/avg_firmsizegrowth_50years_gdp.eps", replace
restore


 
****************************************
* Selection across countries
preserve
collapse age gdp obs [w=wt_rs], by(a2x isic_v3_1 country year)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

collapse (mean) age gdp [w=shares], by(country year)

gen log_gdp=log(gdp)
 

reghdfe age log_gdp , noabsorb vce(r) 
matrix b = e(b)
matrix x = e(V)
local slope_w    : di %4.3f b[1,1]
local se_slope_w  : di %4.3f (x[1,1])^0.5
local t_slope_w  : di %4.3f b[1,1]/((x[1,1])^0.5)


binscatter age gdp, line(lfit) lc(black) ytitle("Average firm age",  size(*1.5)) xtitle("GDP (2017 USD)",  size(*1.5)) xlabel(20000(20000)80000) ylabel(10(5)40) caption("Slope (t-stat): `slope_w' (`t_slope_w')", color(maroon) position(150 0) size(*1.5)) savedata("out/avg_firmexitrate_gdp") replace
gr export "out/avg_firmage_gdp.eps", replace
restore

 

****************************************
* Moments for Calibration
preserve
keep if country=="Netherlands"

gen log_w=log_n2a-log_l1
qui: reghdfe log_w log_l1 [aw=wt_rs], absorb(a2x isic_v3_1)
gen Elogw_bols=_b[log_l1]

qui: reghdfe r_d log_l1 [aw=wt_rs], absorb(a2x isic_v3_1)
gen Erd_bols=_b[log_l1]

gen share20below=.
gen Eshare20below=.
gen share100plus=.
gen Eshare100plus=.

gen share_y30below=.
gen Eshare_y30below=.
gen share_y60plus=.
gen Eshare_y60plus=.

gen share_rd_20below=.
gen Eshare_rd_20below=.
gen share_rd_20_100=.
gen Eshare_rd_20_100=.
gen share_rd_100plus=.
gen Eshare_rd_100plus=.


gen corr_logw_logl1=.
drop obs
gen obs=.

levelsof a2x, local(locid)
foreach l of local locid {

levelsof isic_v3_1 if a2x== "`l'", local(secid)
foreach s of local secid {


replace share20below=0 if l1!=.  & a2x== "`l'" & isic_v3_1== `s'
replace share20below=1 if l1!=.  & l1<=20 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share20below [w=wt_rs] if a2x== "`l'" & isic_v3_1== `s', d
replace Eshare20below=r(mean) if a2x== "`l'" & isic_v3_1== `s'

replace share100plus=0 if l1!=.  & a2x== "`l'" & isic_v3_1== `s'
replace share100plus=1 if l1!=.  & l1>100 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share100plus [w=wt_rs] if a2x== "`l'" & isic_v3_1== `s', d
replace Eshare100plus=r(mean) if a2x== "`l'" & isic_v3_1== `s'


replace share_y30below=0          if age!=.           & a2x=="`l'" & isic_v3_1== `s'
replace share_y30below=1          if age!=. & age<=30 & a2x=="`l'" & isic_v3_1== `s'
qui: sum share_y30below [w=wt_rs] if 				    a2x=="`l'" & isic_v3_1== `s',d
replace Eshare_y30below=r(mean)   if 				    a2x=="`l'" & isic_v3_1== `s'

replace share_y60plus=0          if age!=.          & a2x=="`l'" & isic_v3_1==`s'
replace share_y60plus=1          if age!=. & age>60 & a2x=="`l'" & isic_v3_1==`s'
qui: sum share_y60plus [w=wt_rs] if                   a2x=="`l'" & isic_v3_1==`s', d
replace Eshare_y60plus=r(mean)   if                   a2x=="`l'" & isic_v3_1==`s'



replace share_rd_20below=0 			if r_d==0 & l1!=.  & l1<=20  & a2x== "`l'" & isic_v3_1== `s'
replace share_rd_20below=1 			if r_d==1 & l1!=.  & l1<=20 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share_rd_20below [w=wt_rs] if 							  a2x== "`l'" & isic_v3_1== `s', d
replace Eshare_rd_20below=r(mean)   if							  a2x== "`l'" & isic_v3_1== `s'


replace share_rd_20_100=0 			if r_d==0 & l1!=.  & l1>20 & l1<=100 & a2x== "`l'" & isic_v3_1== `s'
replace share_rd_20_100=1 			if r_d==1 & l1!=.  & l1>20 & l1<=100 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share_rd_20_100 [w=wt_rs]  if 							           a2x== "`l'" & isic_v3_1== `s', d
replace Eshare_rd_20_100 =r(mean)   if							           a2x== "`l'" & isic_v3_1== `s'


replace share_rd_100plus=0 			if r_d==0 		  & l1!=.  & l1>100 & a2x== "`l'" & isic_v3_1== `s'
replace share_rd_100plus=1 			if r_d==1   	  & l1!=.  & l1>100 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share_rd_100plus [w=wt_rs] if 									  a2x== "`l'" & isic_v3_1== `s', d
replace Eshare_rd_100plus=r(mean)   if 									  a2x== "`l'" & isic_v3_1== `s'



sum wt_rs if a2x== "`l'" & isic_v3_1== `s'
gen total_weights = r(sum) 

sum wt_rs if l1<=20 & a2x== "`l'" & isic_v3_1== `s'
gen below20_weights = r(sum) 

sum wt_rs if l1>100 & a2x== "`l'" & isic_v3_1== `s'
gen plus100_weights = r(sum) 


replace obs=total_weights if a2x== "`l'" & isic_v3_1== `s'
drop total_weights  plus100_weights below20_weights 
}
}






gen m_log_w=log_w
collapse (mean) l1 l2 Delta_l Delta_l_a  r_d age Elogw_bols Erd_bols Eshare100plus Eshare20below Eshare_y30below Eshare_y60plus Eshare_rd_20below Eshare_rd_20_100 Eshare_rd_100plus  obs gdpxc m_log_w b_hat (sd) log_l1 log_w  (median) md [w=wt_rs], by(country year a2x isic_v3_1)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

rename log_l1 sd_log_l1
rename log_w sd_log_w
rename md median_md
rename l1 E_l1
rename l2 E_l2
rename Delta_l    E_Delta_l
rename Delta_l_a  E_Delta_l_a 
rename r_d E_r_d
rename age E_age

collapse (mean) gdpxc E_l1 E_l2 E_Delta_l E_Delta_l_a E_r_d E_age Elogw_bols Erd_bols Eshare100plus   Eshare20below Eshare_y30below Eshare_y60plus Eshare_rd_20below Eshare_rd_20_100 Eshare_rd_100plus     sd_log_l1 m_log_w sd_log_w median_md b_hat [w=shares], by(country year)

export excel country year gdpxc E_l1 E_l2 E_Delta_l E_Delta_l_a E_r_d E_age Elogw_bols Erd_bols Eshare100plus  Eshare20below Eshare_y30below Eshare_y60plus Eshare_rd_20below Eshare_rd_20_100 Eshare_rd_100plus     m_log_w sd_log_l1 sd_log_w median_md  b_hat using "out/targets_NET.xlsx", replace first(var)
restore


****************************************
* Moments for Counterfactuals
preserve
keep if country=="Greece"


gen log_w=log(n2a)-log_l1
qui: reghdfe log_w log_l1 [aw=wt_rs], absorb(a2x isic_v3_1)
gen Elogw_bols=_b[log_l1]

qui: reghdfe r_d log_l1 [aw=wt_rs], absorb(a2x isic_v3_1)
gen Erd_bols=_b[log_l1]


gen share20below=.
gen Eshare20below=.
gen share100plus=.
gen Eshare100plus=.

gen share_y30below=.
gen Eshare_y30below=.
gen share_y60plus=.
gen Eshare_y60plus=.


gen share_rd_20below=.
gen Eshare_rd_20below=.
gen share_rd_20_100=.
gen Eshare_rd_20_100=.
gen share_rd_100plus=.
gen Eshare_rd_100plus=.


drop obs
gen obs=.

levelsof a2x, local(locid)
foreach l of local locid {

levelsof isic_v3_1 if a2x== "`l'", local(secid)
foreach s of local secid {


replace share20below=0 if l1!=.  & a2x== "`l'" & isic_v3_1== `s'
replace share20below=1 if l1!=.  & l1<=20 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share20below [w=wt_rs] if a2x== "`l'" & isic_v3_1== `s', d
replace Eshare20below=r(mean)  if a2x== "`l'" & isic_v3_1== `s'

replace share100plus=0 if l1!=.  & a2x== "`l'" & isic_v3_1== `s'
replace share100plus=1 if l1!=.  & l1>100 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share100plus [w=wt_rs] if a2x== "`l'" & isic_v3_1== `s', d
replace Eshare100plus=r(mean) if a2x== "`l'" & isic_v3_1== `s'


replace share_y30below=0          if age!=.           & a2x=="`l'" & isic_v3_1== `s'
replace share_y30below=1          if age!=. & age<=30 & a2x=="`l'" & isic_v3_1== `s'
qui: sum share_y30below [w=wt_rs] if 				    a2x=="`l'" & isic_v3_1== `s',d
replace Eshare_y30below=r(mean)   if 				    a2x=="`l'" & isic_v3_1== `s'

replace share_y60plus=0          if age!=.          & a2x=="`l'" & isic_v3_1==`s'
replace share_y60plus=1          if age!=. & age>60 & a2x=="`l'" & isic_v3_1==`s'
qui: sum share_y60plus [w=wt_rs] if                   a2x=="`l'" & isic_v3_1==`s', d
replace Eshare_y60plus=r(mean)   if                   a2x=="`l'" & isic_v3_1==`s'


replace share_rd_20below=0 			if r_d==0 & l1!=. & l1<=20  & a2x== "`l'" & isic_v3_1== `s'
replace share_rd_20below=1 			if r_d==1 & l1!=.  & l1<=20 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share_rd_20below [w=wt_rs] if 							  a2x== "`l'" & isic_v3_1== `s', d
replace Eshare_rd_20below=r(mean)   if							  a2x== "`l'" & isic_v3_1== `s'


replace share_rd_20_100=0 			if r_d==0 & l1!=.  & l1>20 & l1<=100 & a2x== "`l'" & isic_v3_1== `s'
replace share_rd_20_100=1 			if r_d==1 & l1!=.  & l1>20 & l1<=100 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share_rd_20_100 [w=wt_rs]  if 							           a2x== "`l'" & isic_v3_1== `s', d
replace Eshare_rd_20_100 =r(mean)   if							           a2x== "`l'" & isic_v3_1== `s'


replace share_rd_100plus=0 			if r_d==0 		  & l1!=.  & l1>100 & a2x== "`l'" & isic_v3_1== `s'
replace share_rd_100plus=1 			if r_d==1   	  & l1!=.  & l1>100 & a2x== "`l'" & isic_v3_1== `s'
qui: sum share_rd_100plus [w=wt_rs] if 									  a2x== "`l'" & isic_v3_1== `s', d
replace Eshare_rd_100plus=r(mean)   if 									  a2x== "`l'" & isic_v3_1== `s'




sum wt_rs if a2x== "`l'" & isic_v3_1== `s'
gen total_weights = r(sum) 

sum wt_rs if l1<=20 & a2x== "`l'" & isic_v3_1== `s'
gen below20_weights = r(sum) 

sum wt_rs if l1>100 & a2x== "`l'" & isic_v3_1== `s'
gen plus100_weights = r(sum) 

replace obs=total_weights if a2x== "`l'" & isic_v3_1== `s'

drop total_weights plus100_weights below20_weights 
}
}


gen m_log_w=log_w
collapse (mean) l1 Delta_l Delta_l_a r_d age Elogw_bols Erd_bols Eshare100plus Eshare20below Eshare_y30below Eshare_y60plus Eshare_rd_20below Eshare_rd_20_100 Eshare_rd_100plus     obs gdpxc m_log_w b_hat (sd) log_l1 log_w  (median) md [w=wt_rs], by(country year a2x isic_v3_1)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

rename log_l1 sd_log_l1
rename log_w sd_log_w
rename md median_md
rename l1 E_l1
rename Delta_l E_Delta_l
rename Delta_l_a E_Delta_l_a
rename r_d E_r_d
rename age E_age


collapse (mean) gdpxc E_l1 E_Delta_l E_Delta_l_a E_r_d E_age Elogw_bols Erd_bols Eshare100plus Eshare20below Eshare_y30below Eshare_y60plus Eshare_rd_20below Eshare_rd_20_100 Eshare_rd_100plus     m_log_w sd_log_l1  sd_log_w median_md b_hat [w=shares], by(country year)


export excel country year gdpxc E_l1 E_Delta_l E_Delta_l_a E_r_d E_age Elogw_bols Erd_bols Eshare100plus Eshare20below Eshare_y30below Eshare_y60plus Eshare_rd_20below Eshare_rd_20_100 Eshare_rd_100plus     m_log_w sd_log_l1  sd_log_w median_md b_hat using "out/targets_GRE.xlsx", replace first(var)

restore



******* STANDARD ERRORS FOR BASELINE
keep if country=="Netherlands"

*** SAMPLE weights
exbsample 500 [iw=wt_rs], stub(wt_rs)

*** LOOP OVER SAMPLE weights
tempfile master // Generate temporary save file to store data in
save `master', replace empty


foreach x of numlist 1/500 {
preserve

replace wt_rs=wt_rs`x'

gen log_w=log_n2a-log_l1

drop obs
gen obs=.

qui{
levelsof a2x, local(locid)
foreach l of local locid {

levelsof isic_v3_1 if a2x== "`l'", local(secid)
foreach s of local secid {

sum wt_rs if a2x== "`l'" & isic_v3_1== `s'
gen total_weights = r(sum) 

replace obs=total_weights if a2x== "`l'" & isic_v3_1== `s'
drop total_weights
}
} 
}

gen m_log_w=log_w
collapse (mean) l1 Delta_l r_d age obs gdpxc b_hat (sd) log_l1 log_w  (median) md [w=wt_rs], by(country year a2x isic_v3_1)

qui:sum obs,d
gen totobs=r(sum)
gen shares=obs/totobs

rename log_l1 sd_log_l1
rename log_w sd_log_w
rename l1 E_l1
rename Delta_l E_Delta_l
rename r_d E_r_d
rename age E_age


collapse (mean) gdpxc E_l1 E_Delta_l E_r_d E_age sd_log_l1 sd_log_w [w=shares], by(country year)

gen iteration=`x'

append using `master'
save `master', replace
restore
}
clear
use `master', clear
keep E_l1  E_r_d E_age sd_log_l1 sd_log_w E_Delta_l country year
replace E_Delta_l=E_Delta_l/100


* Create VARCOV
corr E_l1  E_r_d E_age sd_log_l1 sd_log_w E_Delta_l, cov
mat V = r(C)

* Set Excel file for output
putexcel set "out/targets_varcov_NET.xlsx", replace

* Write the matrix to Excel
putexcel A1=matrix(V)

