------------------------------------------------------------------------------------------------- name: log: C:\vhm812-data\2bl_reg_dx.txt log type: text opened on: 14 Jan 2016, 09:08:43 . set more off . . * open the DAISY dataset . use daisy2red.dta, clear . gen parity1=parity-1 . gen calv_mth=month(calv_dt) . gen aut_calv=(calv_mth>=2 & calv_mth<=7) if !missing(calv_mth) . label value aut_calv noyes . gen hs100_ctr=(herd_size-251)/100 . gen hs100_ctr_sq=hs100_ctr^2 . save daisy2red_01, replace file daisy2red_01.dta saved . . *final model . reg wpc hs100_ctr hs100_ctr_sq parity1 aut_calv twin dyst rp##vag_disch, vsquish Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(9, 1564) = 13.22 Model | 296062.694 9 32895.8549 Prob > F = 0.0000 Residual | 3892027.86 1,564 2488.50886 R-squared = 0.0707 -------------+---------------------------------- Adj R-squared = 0.0653 Total | 4188090.56 1,573 2662.48605 Root MSE = 49.885 ------------------------------------------------------------------------------ wpc | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | 19.85708 2.163397 9.18 0.000 15.61361 24.10054 hs100_ctr_sq | 11.13827 3.111145 3.58 0.000 5.035817 17.24073 parity1 | 1.13721 .8583103 1.32 0.185 -.5463501 2.82077 aut_calv | -8.263839 2.537751 -3.26 0.001 -13.24159 -3.286086 twin | 20.68314 9.845165 2.10 0.036 1.37203 39.99425 dyst | 11.70041 5.462576 2.14 0.032 .985666 22.41516 rp | yes | 5.98687 4.811976 1.24 0.214 -3.451734 15.42547 vag_disch | yes | 1.228196 7.161395 0.17 0.864 -12.81875 15.27514 rp#vag_disch | yes#yes | 22.85194 12.51605 1.83 0.068 -1.698056 47.40194 _cons | 64.33029 2.634114 24.42 0.000 59.16352 69.49705 ------------------------------------------------------------------------------ . *initial diagnostic . capture drop fit stdres /* adjust if not defined */ . predict fit, xb . predict stdres, rstandard . *homoskedasticity . scatter stdres fit . estat hettest Breusch-Pagan / Cook-Weisberg test for heteroskedasticity Ho: Constant variance Variables: fitted values of wpc chi2(1) = 20.58 Prob > chi2 = 0.0000 . estat imtest Cameron & Trivedi's decomposition of IM-test --------------------------------------------------- Source | chi2 df p ---------------------+----------------------------- Heteroskedasticity | 74.11 44 0.0030 Skewness | 143.84 9 0.0000 Kurtosis | 33.27 1 0.0000 ---------------------+----------------------------- Total | 251.22 54 0.0000 --------------------------------------------------- . *normality . qnorm stdres . hist stdres, normal (bin=31, start=-1.5436347, width=.18891791) . summ stdres, d Standardized residuals ------------------------------------------------------------- Percentiles Smallest 1% -1.295559 -1.543635 5% -1.085226 -1.491268 10% -.9411177 -1.445124 Obs 1,574 25% -.7014489 -1.43959 Sum of Wgt. 1,574 50% -.3049904 Mean .0000153 Largest Std. Dev. 1.000497 75% .4302664 3.716394 90% 1.438834 3.910809 Variance 1.000993 95% 2.065976 4.020359 Skewness 1.371514 99% 3.24163 4.31282 Kurtosis 4.72451 . swilk stdres Shapiro-Wilk W test for normal data Variable | Obs W V z Prob>z -------------+------------------------------------------------------ stdres | 1,574 0.87871 115.660 11.977 0.00000 . . **transforming wpc . xi:boxcox wpc hs100_ctr hs100_ctr_sq parity1 i.aut_calv i.twin i.dyst i.rp*i.vag_disch i.aut_calv _Iaut_calv_0-1 (naturally coded; _Iaut_calv_0 omitted) i.twin _Itwin_0-1 (naturally coded; _Itwin_0 omitted) i.dyst _Idyst_0-1 (naturally coded; _Idyst_0 omitted) i.rp _Irp_0-1 (naturally coded; _Irp_0 omitted) i.vag_disch _Ivag_disch_0-1 (naturally coded; _Ivag_disch_0 omitted) i.rp*i.vag_di~h _IrpXvag_#_# (coded as above) Fitting comparison model Iteration 0: log likelihood = -8439.9903 Iteration 1: log likelihood = -8082.9461 Iteration 2: log likelihood = -8035.2171 Iteration 3: log likelihood = -8034.9512 Iteration 4: log likelihood = -8034.9511 Fitting full model Iteration 0: log likelihood = -8382.2918 Iteration 1: log likelihood = -8022.9541 Iteration 2: log likelihood = -7958.3552 Iteration 3: log likelihood = -7957.985 Iteration 4: log likelihood = -7957.9849 Number of obs = 1,574 LR chi2(9) = 153.93 Log likelihood = -7957.9849 Prob > chi2 = 0.000 ------------------------------------------------------------------------------ wpc | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- /theta | .1097595 .0270646 4.06 0.000 .0567138 .1628052 ------------------------------------------------------------------------------ Estimates of scale-variant parameters ---------------------------- | Coef. -------------+-------------- Notrans | hs100_ctr | .5236879 hs100_ctr_sq | .316867 parity1 | .0207726 _Iaut_calv_1 | -.2119782 _Itwin_1 | .5994942 _Idyst_1 | .1803076 _Irp_1 | .1697644 _Ivag_disc~1 | -.0333896 _IrpXvag_1_1 | .6352271 _cons | 4.90172 -------------+-------------- /sigma | 1.119778 ---------------------------- --------------------------------------------------------- Test Restricted LR statistic P-value H0: log likelihood chi2 Prob > chi2 --------------------------------------------------------- theta = -1 -9664.734 3413.50 0.000 theta = 0 -7966.6087 17.25 0.000 theta = 1 -8382.2918 848.61 0.000 --------------------------------------------------------- . gen wpc_ln=ln(wpc) . reg wpc_ln hs100_ctr hs100_ctr_sq parity1 i.aut_calv i.twin i.dyst i.rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(9, 1564) = 18.05 Model | 86.9474063 9 9.66082293 Prob > F = 0.0000 Residual | 836.874538 1,564 .535086022 R-squared = 0.0941 -------------+---------------------------------- Adj R-squared = 0.0889 Total | 923.821945 1,573 .587299393 Root MSE = .7315 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | .343658 .0317233 10.83 0.000 .2814333 .4058827 hs100_ctr_sq | .2118728 .0456207 4.64 0.000 .1223885 .301357 parity1 | .0129844 .012586 1.03 0.302 -.0117028 .0376715 | aut_calv | yes | -.1371621 .0372127 -3.69 0.000 -.2101542 -.0641701 | twin | yes | .3927426 .1443661 2.72 0.007 .1095711 .6759141 | dyst | yes | .1109405 .0801013 1.39 0.166 -.0461768 .2680578 | rp | yes | .1123166 .0705612 1.59 0.112 -.0260878 .250721 | vag_disch | yes | -.0260135 .1050122 -0.25 0.804 -.231993 .1799661 | rp#vag_disch | yes#yes | .4136999 .183531 2.25 0.024 .0537072 .7736926 | _cons | 3.888587 .0386257 100.67 0.000 3.812823 3.96435 ------------------------------------------------------------------------------ . **model checking . capture drop fit stdres /* adjust if not defined */ . predict fit, xb . predict stdres, rstandard . *homoskedasticity . estat hettest Breusch-Pagan / Cook-Weisberg test for heteroskedasticity Ho: Constant variance Variables: fitted values of wpc_ln chi2(1) = 19.98 Prob > chi2 = 0.0000 . estat imtest Cameron & Trivedi's decomposition of IM-test --------------------------------------------------- Source | chi2 df p ---------------------+----------------------------- Heteroskedasticity | 48.63 44 0.2919 Skewness | 8.75 9 0.4605 Kurtosis | 0.52 1 0.4698 ---------------------+----------------------------- Total | 57.91 54 0.3332 --------------------------------------------------- . scatter stdres fit . *normality . qnorm stdres . hist stdres, normal (bin=31, start=-5.3996067, width=.25507099) . summ stdres, d Standardized residuals ------------------------------------------------------------- Percentiles Smallest 1% -2.211829 -5.399607 5% -1.564966 -4.989504 10% -1.242075 -3.61681 Obs 1,574 25% -.7143057 -3.316 Sum of Wgt. 1,574 50% -.0133218 Mean .000018 Largest Std. Dev. 1.000267 75% .7199247 2.358784 90% 1.333008 2.373729 Variance 1.000533 95% 1.628151 2.452677 Skewness -.1738834 99% 2.15899 2.507594 Kurtosis 3.382854 . swilk stdres Shapiro-Wilk W test for normal data Variable | Obs W V z Prob>z -------------+------------------------------------------------------ stdres | 1,574 0.98974 9.787 5.751 0.00000 . * check for collinearity issues . estat vif Variable | VIF 1/VIF -------------+---------------------- hs100_ctr | 1.14 0.878856 hs100_ctr_sq | 1.14 0.879714 parity1 | 1.04 0.962306 1.aut_calv | 1.02 0.985045 1.twin | 1.03 0.967484 1.dyst | 1.06 0.943538 1.rp | 1.26 0.796702 1.vag_disch | 1.60 0.624261 rp#vag_disch | 1 1 | 1.85 0.539810 -------------+---------------------- Mean VIF | 1.24 . estat vce, corr Correlation matrix of coefficients of regress model | 1. 1. 1. 1. 1. e(V) | hs100_~r hs100_~q parity1 aut_calv twin dyst rp vag_di~h -------------+-------------------------------------------------------------------------------- hs100_ctr | 1.0000 hs100_ctr_sq | 0.3269 1.0000 parity1 | -0.0415 -0.0468 1.0000 1.aut_calv | 0.0521 -0.0265 -0.0500 1.0000 1.twin | 0.0326 0.0464 -0.0666 -0.0487 1.0000 1.dyst | 0.1146 0.0717 0.1303 0.0023 -0.0214 1.0000 1.rp | 0.0230 0.0540 0.0318 0.0438 -0.0807 -0.0656 1.0000 1.vag_disch | 0.0304 0.0698 0.0649 0.0387 -0.0383 -0.1188 0.0742 1.0000 1.rp#| 1.vag_disch | -0.0184 -0.0571 -0.0644 -0.0024 -0.0429 0.0861 -0.3873 -0.5752 _cons | -0.1716 -0.4415 -0.5404 -0.4246 0.0004 -0.2091 -0.1974 -0.1711 | 1.rp# e(V) | 1.vag_~h _cons -------------+-------------------- 1.rp#| 1.vag_disch | 1.0000 _cons | 0.1133 1.0000 . . **linearity . * check modelling of herd_size and parity . lowess stdres herd_size . lowess stdres parity . * looking at herd_size without herd_size in the model . reg wpc_ln parity1 1.aut_calv 1.twin 1.dyst i.rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(7, 1566) = 5.82 Model | 23.4240047 7 3.34628639 Prob > F = 0.0000 Residual | 900.39794 1,566 .574966756 R-squared = 0.0254 -------------+---------------------------------- Adj R-squared = 0.0210 Total | 923.821945 1,573 .587299393 Root MSE = .75827 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- parity1 | .0191623 .0130272 1.47 0.142 -.0063904 .0447149 | aut_calv | yes | -.156154 .0384813 -4.06 0.000 -.2316343 -.0806737 | twin | yes | .3353984 .1494624 2.24 0.025 .0422309 .6285659 | dyst | yes | .0081531 .0824313 0.10 0.921 -.1535343 .1698404 | rp | yes | .0906877 .0730356 1.24 0.215 -.0525701 .2339455 | vag_disch | yes | -.0684004 .1085865 -0.63 0.529 -.2813906 .1445898 | rp#vag_disch | yes#yes | .4618901 .1899367 2.43 0.015 .089333 .8344472 | _cons | 3.978786 .0359075 110.81 0.000 3.908354 4.049218 ------------------------------------------------------------------------------ . predict stdres1, rstandard . lowess stdres1 herd_size . * looking at parity without parity in the model . reg wpc_ln hs100_ctr hs100_ctr_sq 1.aut_calv 1.twin 1.dyst i.rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(8, 1565) = 20.18 Model | 86.3779092 8 10.7972387 Prob > F = 0.0000 Residual | 837.444036 1,565 .53510801 R-squared = 0.0935 -------------+---------------------------------- Adj R-squared = 0.0889 Total | 923.821945 1,573 .587299393 Root MSE = .73151 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | .3450169 .0316966 10.88 0.000 .2828447 .4071892 hs100_ctr_sq | .2140753 .0455717 4.70 0.000 .1246873 .3034633 | aut_calv | yes | -.1352417 .0371669 -3.64 0.000 -.2081438 -.0623395 | twin | yes | .4026686 .1440481 2.80 0.005 .1201211 .6852162 | dyst | yes | .1001739 .0794202 1.26 0.207 -.0556073 .2559551 | rp | yes | .1099998 .0705269 1.56 0.119 -.0283373 .2483368 | vag_disch | yes | -.0330431 .1047931 -0.32 0.753 -.2385927 .1725065 | rp#vag_disch | yes#yes | .4258954 .1831536 2.33 0.020 .066643 .7851478 | _cons | 3.910122 .0324999 120.31 0.000 3.846374 3.97387 ------------------------------------------------------------------------------ . predict stdres2, rstandard . lowess stdres2 parity1 . . **estimates and CIs backtransformed from ln-scale . reg wpc_ln hs100_ctr hs100_ctr_sq parity1 aut_calv twin dyst i.rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(9, 1564) = 18.05 Model | 86.9474063 9 9.66082293 Prob > F = 0.0000 Residual | 836.874538 1,564 .535086022 R-squared = 0.0941 -------------+---------------------------------- Adj R-squared = 0.0889 Total | 923.821945 1,573 .587299393 Root MSE = .7315 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | .343658 .0317233 10.83 0.000 .2814333 .4058827 hs100_ctr_sq | .2118728 .0456207 4.64 0.000 .1223885 .301357 parity1 | .0129844 .012586 1.03 0.302 -.0117028 .0376715 aut_calv | -.1371621 .0372127 -3.69 0.000 -.2101542 -.0641701 twin | .3927426 .1443661 2.72 0.007 .1095711 .6759141 dyst | .1109405 .0801013 1.39 0.166 -.0461768 .2680578 | rp | yes | .1123166 .0705612 1.59 0.112 -.0260878 .250721 | vag_disch | yes | -.0260135 .1050122 -0.25 0.804 -.231993 .1799661 | rp#vag_disch | yes#yes | .4136999 .183531 2.25 0.024 .0537072 .7736926 | _cons | 3.888587 .0386257 100.67 0.000 3.812823 3.96435 ------------------------------------------------------------------------------ . *prediction for cows from avg herd size herds with dyst, parity=3, . *calv in fall, single calving . margins , over(rp vag_disch) at(hs100_ctr=0 hs100_ctr_sq=0 dyst=1 parity1=2 aut_calv=1 twin=0) > expression(exp(predict(xb))) Predictive margins Number of obs = 1,574 Model VCE : OLS Expression : exp(predict(xb)) over : rp vag_disch at : 0.rp#0.vag_disch hs100_ctr = 0 hs100_ctr_sq = 0 parity1 = 2 aut_calv = 1 twin = 0 dyst = 1 0.rp#1.vag_disch hs100_ctr = 0 hs100_ctr_sq = 0 parity1 = 2 aut_calv = 1 twin = 0 dyst = 1 1.rp#0.vag_disch hs100_ctr = 0 hs100_ctr_sq = 0 parity1 = 2 aut_calv = 1 twin = 0 dyst = 1 1.rp#1.vag_disch hs100_ctr = 0 hs100_ctr_sq = 0 parity1 = 2 aut_calv = 1 twin = 0 dyst = 1 ------------------------------------------------------------------------------ | Delta-method | Margin Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- rp#vag_disch | no#no | 48.82946 4.028419 12.12 0.000 40.9339 56.72502 no#yes | 47.57561 5.844969 8.14 0.000 36.11969 59.03154 yes#no | 54.63367 5.547506 9.85 0.000 43.76076 65.50658 yes#yes | 80.50641 12.65005 6.36 0.000 55.71278 105.3001 ------------------------------------------------------------------------------ . marginsplot, noci Variables that uniquely identify margins: rp vag_disch . . **residuals and diagnostics . reg wpc_ln hs100_ctr hs100_ctr_sq parity1 1.aut_calv 1.twin 1.dyst i.rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(9, 1564) = 18.05 Model | 86.9474063 9 9.66082293 Prob > F = 0.0000 Residual | 836.874538 1,564 .535086022 R-squared = 0.0941 -------------+---------------------------------- Adj R-squared = 0.0889 Total | 923.821945 1,573 .587299393 Root MSE = .7315 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | .343658 .0317233 10.83 0.000 .2814333 .4058827 hs100_ctr_sq | .2118728 .0456207 4.64 0.000 .1223885 .301357 parity1 | .0129844 .012586 1.03 0.302 -.0117028 .0376715 | aut_calv | yes | -.1371621 .0372127 -3.69 0.000 -.2101542 -.0641701 | twin | yes | .3927426 .1443661 2.72 0.007 .1095711 .6759141 | dyst | yes | .1109405 .0801013 1.39 0.166 -.0461768 .2680578 | rp | yes | .1123166 .0705612 1.59 0.112 -.0260878 .250721 | vag_disch | yes | -.0260135 .1050122 -0.25 0.804 -.231993 .1799661 | rp#vag_disch | yes#yes | .4136999 .183531 2.25 0.024 .0537072 .7736926 | _cons | 3.888587 .0386257 100.67 0.000 3.812823 3.96435 ------------------------------------------------------------------------------ . capture drop fit stdres /* adjust if not defined */ . capture drop delres- dfb_int . predict fit, xb . predict stdres, rstandard . predict delres, rstudent . predict lev, lev . predict cook, cooksd . predict dfit, dfit . predict dfb_hs2, dfbeta(hs100_ctr_sq) . predict dfb_dyst, dfbeta(1.dyst) . predict dfb_rp, dfbeta(1.rp) . predict dfb_vd, dfbeta(1.vag_disch) . predict dfb_int, dfbeta(1.rp#1.vag_disch) . scalar nobs=1574 . scalar nparam=10 . foreach var in wpc_ln fit stdres lev cook dfit dfb_hs2 dfb_dyst dfb_rp dfb_vd dfb_int { 2. format `var' %5.3f 3. } . . * examine outliers . * number of standardized residuals outside -2/+2 and -3/+3 . * expect 5% (n=79) outside -2,+2 and 1% (n=14) outside -3,+3 . count if abs(stdres)>2 /* n=50 */ 50 . count if abs(stdres)>3 /* n=6, so fewer very extreme residual values than expected */ 6 . sum delres, d Studentized residuals ------------------------------------------------------------- Percentiles Smallest 1% -2.214588 -5.448908 5% -1.565692 -5.028087 10% -1.242291 -3.630869 Obs 1,574 25% -.7141938 -3.326655 Sum of Wgt. 1,574 50% -.0133175 Mean -.0000384 Largest Std. Dev. 1.001035 75% .7198138 2.362236 90% 1.33334 2.377255 Variance 1.002071 95% 1.629012 2.456621 Skewness -.1785964 99% 2.161524 2.511847 Kurtosis 3.415061 . * outlier test based on deletion residuals . display 2*nobs*ttail(nobs-nparam-1, 5.02) /* P=0.0009 so S */ .0009064 . display invttail(nobs-nparam-1,.025/nobs) /* values >= abs(4.17) outliers*/ 4.1726361 . br if abs(delres) >=4 . **there are two observations with extreme residual values (outliers) . ** two cows with 1 days interval from the end of wp to conception . ** cows 2272 (herd 106) and cow 1032 (herd 4) . . * browse most extreme residuals . sort stdres . browse wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres in 1/10 / > * most extreme negative residuals */ . browse wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres in -10/-1 > /* most extreme positive residuals */ . . sort delres . browse wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres in 1/5 /* > most extreme negative residuals */ . browse wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres in -5/-1 > /* most extreme positive residuals */ . . * leverage and influence diagnostics . summ lev ,d Leverage ------------------------------------------------------------- Percentiles Smallest 1% .0016636 .0016636 5% .0018854 .0016636 10% .0020899 .0016636 Obs 1,574 25% .0023828 .0016636 Sum of Wgt. 1,574 50% .0031862 Mean .0063532 Largest Std. Dev. .0083435 75% .007048 .0633344 90% .0124777 .0636456 Variance .0000696 95% .0214789 .0637559 Skewness 3.50352 99% .0444552 .0642237 Kurtosis 17.16434 . display "leverage cutoff: " 2*nparam/nobs leverage cutoff: .01270648 . display "conservative leverage cutoff: " 3*nparam/nobs conservative leverage cutoff: .01905972 . * large leverage values . count if lev>=.01905972 /* many (n=108) high leverage values */ 108 . gsort -lev . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres lev in 1/10, clean > noobs wpc wpc_ln fit aut_calv herd_s~e parity1 twin dyst rp vag_di~h stdres > lev 76 4.331 4.699 no 185 4 yes no yes yes -0.520 > 0.064 45 3.807 4.577 yes 201 4 yes no yes yes -1.089 > 0.064 137 4.920 4.994 no 294 2 yes no yes yes -0.105 > 0.064 94 4.543 4.865 no 263 3 yes no yes yes -0.454 > 0.063 53 3.970 4.227 yes 263 5 yes no no yes -0.362 > 0.059 110 4.700 4.162 yes 263 0 yes no no yes 0.758 > 0.058 32 3.466 4.262 yes 201 1 yes yes yes no -1.117 > 0.052 99 4.595 4.592 no 294 1 yes yes no no 0.004 > 0.051 23 3.135 4.121 yes 185 0 yes yes no no -1.381 > 0.050 40 3.689 4.407 yes 263 0 no yes yes yes -1.005 > 0.046 . tab3way twin rp dyst Table entries are cell frequencies Missing categories ignored ------------------------------------ | Dystocia at calving and | Retained placenta at | calving Twins | --- no --- --- yes -- born | no yes no yes ----------+------------------------- no | 1332 124 76 15 yes | 15 9 2 1 ------------------------------------ . * large Cook's D values . summ cook,d Cook's D ------------------------------------------------------------- Percentiles Smallest 1% 8.65e-08 5.57e-10 5% 1.44e-06 5.86e-09 10% 6.87e-06 1.08e-08 Obs 1,574 25% .0000447 1.16e-08 Sum of Wgt. 1,574 50% .000199 Mean .0006351 Largest Std. Dev. .0013679 75% .0006061 .0120243 90% .0014951 .0133958 Variance 1.87e-06 95% .0029054 .0141347 Skewness 5.314337 99% .0068727 .0174304 Kurtosis 42.05991 . display "Cook's D cutoff: " 4/nobs Cook's D cutoff: .0025413 . count if cook>=.0025413 /* many (n=92) high Cook's D values */ 92 . gsort -cook . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres lev cook in 1/10, > clean noobs /* most extreme Cook's D values */ wpc wpc_ln fit aut_calv herd_s~e parity1 twin dyst rp vag_di~h stdres > lev cook 14 2.639 4.121 yes 235 2 yes no no no -2.066 > 0.039 0.017 11 2.398 4.031 no 263 1 no yes no yes -2.263 > 0.027 0.014 205 5.323 3.766 no 125 0 no no no yes 2.159 > 0.028 0.013 11 2.398 3.881 yes 263 0 no yes no yes -2.056 > 0.028 0.012 38 3.638 4.758 no 333 4 yes no no no -1.565 > 0.043 0.011 25 3.219 4.446 no 263 1 no no yes yes -1.708 > 0.035 0.011 23 3.135 4.121 yes 185 0 yes yes no no -1.381 > 0.050 0.010 229 5.434 3.925 yes 294 1 no no no yes 2.084 > 0.021 0.009 213 5.361 4.254 no 185 0 no no yes yes 1.542 > 0.036 0.009 45 3.807 4.577 yes 201 4 yes no yes yes -1.089 > 0.064 0.008 . * large DFIT values . summ dfit, d Dfits ------------------------------------------------------------- Percentiles Smallest 1% -.2314434 -.4179343 5% -.1251009 -.3764578 10% -.0824913 -.3471186 Obs 1,574 25% -.0435816 -.3314645 Sum of Wgt. 1,574 50% -.000781 Mean .0003931 Largest Std. Dev. .0797594 75% .0452949 .2795224 90% .0887097 .298769 Variance .0063616 95% .120787 .3084654 Skewness -.1148466 99% .2251135 .3664328 Kurtosis 5.645826 . display "DFITS cutoff: " 2*sqrt(nparam/nobs)*(nobs>=120)+1*(nobs<120) DFITS cutoff: .15941443 . count if abs(dfit)>=.15941443 /* many (n=92) high DFITS values */ 92 . sort dfit . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres lev cook dfit in 1 > /10 , clean/* most extreme negative DFITS values */ wpc wpc_ln fit aut_calv herd_s~e parity1 twin dyst rp vag_di~h stdr > es lev cook dfit 1. 14 2.639 4.121 yes 235 2 yes no no no -2.0 > 66 0.039 0.017 -0.418 2. 11 2.398 4.031 no 263 1 no yes no yes -2.2 > 63 0.027 0.014 -0.376 3. 11 2.398 3.881 yes 263 0 no yes no yes -2.0 > 56 0.028 0.012 -0.347 4. 38 3.638 4.758 no 333 4 yes no no no -1.5 > 65 0.043 0.011 -0.331 5. 25 3.219 4.446 no 263 1 no no yes yes -1.7 > 08 0.035 0.011 -0.325 6. 23 3.135 4.121 yes 185 0 yes yes no no -1.3 > 81 0.050 0.010 -0.316 7. 45 3.807 4.577 yes 201 4 yes no yes yes -1.0 > 89 0.064 0.008 -0.284 8. 31 3.434 4.485 no 263 4 no no yes yes -1.4 > 63 0.036 0.008 -0.283 9. 14 2.639 3.839 no 185 0 no yes no yes -1.6 > 63 0.027 0.008 -0.277 10. 32 3.466 4.262 yes 201 1 yes yes yes no -1.1 > 17 0.052 0.007 -0.262 . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres lev cook dfit in - > 10/-1, clean /* most extreme positive DFITS values */ wpc wpc_ln fit aut_calv herd_s~e parity1 twin dyst rp vag_di~h std > res lev cook dfit 1565. 218 5.384 3.728 yes 185 0 no yes no no 2. > 279 0.012 0.007 0.257 1566. 170 5.136 4.018 no 263 0 no yes no yes 1. > 549 0.027 0.007 0.257 1567. 205 5.323 4.050 no 294 0 no no no yes 1. > 759 0.021 0.007 0.258 1568. 253 5.533 3.920 no 201 3 no yes no no 2. > 221 0.013 0.007 0.259 1569. 149 5.004 3.920 yes 263 3 no yes no yes 1. > 505 0.030 0.007 0.263 1570. 240 5.481 3.767 yes 185 3 no yes no no 2. > 359 0.013 0.008 0.275 1571. 232 5.447 3.795 yes 201 4 no yes no no 2. > 274 0.015 0.008 0.280 1572. 213 5.361 4.254 no 185 0 no no yes yes 1. > 542 0.036 0.009 0.299 1573. 229 5.434 3.925 yes 294 1 no no no yes 2. > 084 0.021 0.009 0.308 1574. 205 5.323 3.766 no 125 0 no no no yes 2. > 159 0.028 0.013 0.366 . * dfbeta diagnostics (for a subset of predictors) . display "DFBETA cutoff: " 2/sqrt(nobs)*(nobs>=120)+1*(nobs<120) DFBETA cutoff: .05041127 . count if abs(dfb_dyst)>.05041127 /* n=74 */ 74 . sum dfb_hs2 dfb_dyst dfb_rp dfb_vd dfb_int //only for hsize2 since hsize will be quite > collinear and difficult to interpret// Variable | Obs Mean Std. Dev. Min Max -------------+--------------------------------------------------------- dfb_hs2 | 1,574 9.10e-08 .0239995 -.1043 .1487626 dfb_dyst | 1,574 -.0000162 .0299013 -.2078438 .2472617 dfb_rp | 1,574 4.82e-07 .0254968 -.1650124 .1895015 dfb_vd | 1,574 2.40e-06 .029381 -.2821976 .3064255 dfb_int | 1,574 -7.50e-06 .0236208 -.2360168 .2135575 . sort dfb_dyst . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres lev > cook dfit dfb_dyst in 1/10, clean /* most extreme negative dfb_dyst values */ wpc wpc_ln fit aut_calv herd_s~e parity1 twin dyst rp vag_di~h stdr > es delres lev cook dfit dfb_dyst 1. 15 2.708 4.186 no 294 0 no yes no no -2.0 > 34 -2.036158 0.013 0.005 -0.231 -0.208 2. 11 2.398 4.031 no 263 1 no yes no yes -2.2 > 63 -2.265854 0.027 0.014 -0.376 -0.194 3. 14 2.639 4.044 no 263 0 no yes no no -1.9 > 32 -1.934105 0.012 0.005 -0.217 -0.193 4. 11 2.398 3.881 yes 263 0 no yes no yes -2.0 > 56 -2.057768 0.028 0.012 -0.347 -0.172 5. 18 2.890 4.156 no 263 0 no yes yes no -1.7 > 46 -1.747611 0.018 0.006 -0.239 -0.164 6. 19 2.944 4.058 yes 263 3 no yes yes no -1.5 > 38 -1.538602 0.020 0.005 -0.222 -0.155 7. 16 2.773 3.881 no 201 0 no yes no no -1.5 > 24 -1.524797 0.012 0.003 -0.170 -0.149 8. 20 2.996 4.044 no 263 0 no yes no no -1.4 > 42 -1.442262 0.012 0.003 -0.162 -0.144 9. 24 3.178 4.101 yes 294 4 no yes no no -1.2 > 72 -1.272148 0.015 0.003 -0.159 -0.142 10. 20 2.996 4.075 no 235 1 no yes yes no -1.4 > 89 -1.489925 0.018 0.004 -0.202 -0.141 . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres lev > cook dfit dfb_dyst in -10/-1 /* most extreme positive dfb_dyst values */ +------------------------------------------------------------------------------------+ 1565. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 184 | 5.215 | 3.950 | no | 235 | 0 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 1.740 | 1.741162 | 0.012 | 0.004 | 0.194 | 0.171 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1566. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 169 | 5.130 | 3.946 | yes | 263 | 3 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 1.630 | 1.631035 | 0.014 | 0.004 | 0.192 | 0.174 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1567. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 153 | 5.030 | 3.728 | yes | 185 | 0 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 1.792 | 1.793212 | 0.012 | 0.004 | 0.202 | 0.175 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1568. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 178 | 5.182 | 3.933 | no | 201 | 4 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 1.721 | 1.721754 | 0.015 | 0.005 | 0.213 | 0.184 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1569. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 178 | 5.182 | 3.839 | yes | 235 | 2 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 1.848 | 1.849122 | 0.013 | 0.004 | 0.209 | 0.191 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1570. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 218 | 5.384 | 3.728 | yes | 185 | 0 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 2.279 | 2.282074 | 0.012 | 0.007 | 0.257 | 0.223 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1571. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 214 | 5.366 | 3.741 | yes | 185 | 1 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 2.235 | 2.238212 | 0.012 | 0.006 | 0.249 | 0.224 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1572. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 253 | 5.533 | 3.920 | no | 201 | 3 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 2.221 | 2.223887 | 0.013 | 0.007 | 0.259 | 0.232 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1573. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 232 | 5.447 | 3.795 | yes | 201 | 4 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 2.274 | 2.277405 | 0.015 | 0.008 | 0.280 | 0.244 | +------------------------------------------------------------------------------------+ +------------------------------------------------------------------------------------+ 1574. | wpc | wpc_ln | fit | aut_calv | herd_s~e | parity1 | twin | dyst | rp | vag_di~h | | 240 | 5.481 | 3.767 | yes | 185 | 3 | no | yes | no | no | |--------------------------------------------------------------------+---------------| | stdres | delres | lev | cook | dfit | dfb_dyst | | 2.359 | 2.362236 | 0.013 | 0.008 | 0.275 | 0.247 | +------------------------------------------------------------------------------------+ . graph box dfb_hs2 dfb_dyst dfb_rp dfb_vd dfb_int, yline(-0.05 0.05) . . * same approach for other DFBETAs . * dfbeta diagnostic for hsize2 . sort dfb_hs2 . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres lev cook df > it dfb_hs2 in 1/10, clean /* most extreme negative dfb_dyst values */ wpc wpc_ln fit aut_calv herd_s~e parity1 twin dyst rp vag_di~h stdre > s delres lev cook dfit dfb_hs2 1. 12 2.485 3.792 no 125 0 no no no no -1.79 > 4 -1.795646 0.008 0.003 -0.165 -0.104 2. 12 2.485 3.792 no 125 0 no no no no -1.79 > 4 -1.795646 0.008 0.003 -0.165 -0.104 3. 12 2.485 3.668 yes 125 1 no no no no -1.62 > 3 -1.623839 0.007 0.002 -0.139 -0.091 4. 15 2.708 3.818 no 125 2 no no no no -1.52 > 3 -1.523598 0.007 0.002 -0.132 -0.086 5. 13 2.565 3.681 yes 125 2 no no no no -1.53 > 1 -1.531436 0.007 0.002 -0.129 -0.084 6. 13 2.565 3.707 yes 125 4 no no no no -1.56 > 7 -1.568122 0.008 0.002 -0.143 -0.084 7. 16 2.773 3.792 no 125 0 no no no no -1.39 > 9 -1.399855 0.008 0.002 -0.129 -0.081 8. 14 2.639 3.655 yes 125 0 no no no no -1.39 > 4 -1.394688 0.008 0.002 -0.127 -0.079 9. 17 2.833 3.805 no 125 1 no no no no -1.33 > 4 -1.333839 0.008 0.001 -0.117 -0.076 10. 22 3.091 3.955 no 125 4 no yes no no -1.19 > 3 -1.19337 0.021 0.003 -0.173 -0.075 . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres lev cook df > it dfb_hs2 in -10/-1, clean /* most extreme positive dfb_dyst values */ wpc wpc_ln fit aut_calv herd_s~e parity1 twin dyst rp vag_di~h stdr > es delres lev cook dfit dfb_hs2 1565. 130 4.868 3.681 yes 125 2 no no no no 1.6 > 28 1.629012 0.007 0.002 0.137 0.090 1566. 126 4.836 3.655 yes 125 0 no no no no 1.6 > 22 1.622661 0.008 0.002 0.147 0.092 1567. 127 4.844 3.655 yes 125 0 no no no no 1.6 > 33 1.633537 0.008 0.002 0.148 0.093 1568. 138 4.927 3.668 yes 125 1 no no no no 1.7 > 28 1.729221 0.007 0.002 0.148 0.097 1569. 3 1.099 3.741 yes 235 3 no no no no -3.6 > 17 -3.630869 0.003 0.003 -0.186 0.101 1570. 1 0.000 3.946 no 263 1 no no no no -5.4 > 00 -5.448908 0.002 0.006 -0.243 0.117 1571. 208 5.338 3.805 no 125 1 no no no no 2.1 > 03 2.105534 0.008 0.003 0.185 0.121 1572. 253 5.533 3.707 yes 125 4 no no no no 2.5 > 08 2.511847 0.008 0.005 0.230 0.134 1573. 234 5.455 3.668 yes 125 1 no no no no 2.4 > 53 2.456621 0.007 0.004 0.211 0.137 1574. 205 5.323 3.766 no 125 0 no no no yes 2.1 > 59 2.161524 0.028 0.013 0.366 0.149 . * dfbeta diagnostic for hsize . * ideally we would need to create two terms that are independent in order to assess dfbeta for > hsize and hsize2 . * I am not going to do this here, instead I will fit a model with only hsize and look at the df > beta for the linear term . regress wpc_ln aut_calv hs100_ctr parity1 twin dyst rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(8, 1565) = 17.39 Model | 75.4062567 8 9.42578209 Prob > F = 0.0000 Residual | 848.415688 1,565 .54211865 R-squared = 0.0816 -------------+---------------------------------- Adj R-squared = 0.0769 Total | 923.821945 1,573 .587299393 Root MSE = .73629 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- aut_calv | -.1325884 .0374433 -3.54 0.000 -.2060328 -.059144 hs100_ctr | .2955003 .0301771 9.79 0.000 .2363085 .3546921 parity1 | .0157198 .0126545 1.24 0.214 -.0091018 .0405414 twin | .3616024 .1451549 2.49 0.013 .0768838 .6463209 dyst | .0842735 .0804186 1.05 0.295 -.073466 .242013 | rp | yes | .094625 .0709198 1.33 0.182 -.0444827 .2337327 | vag_disch | yes | -.0600363 .1054425 -0.57 0.569 -.2668598 .1467872 | rp#vag_disch | yes#yes | .4623984 .1844314 2.51 0.012 .1006398 .824157 | _cons | 3.967781 .0348848 113.74 0.000 3.899356 4.036207 ------------------------------------------------------------------------------ . predict dfb_hs, dfbeta(hs100_ctr) . summ dfb_hs Variable | Obs Mean Std. Dev. Min Max -------------+--------------------------------------------------------- dfb_hs | 1,574 -.0000113 .0239101 -.1423404 .0976687 . sort dfb_hs . list wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres lev cook dfit dfb_ > hs in 1/10, clean /* most extreme negative dfb_dyst values */ wpc wpc_ln fit aut_calv herd_s~e parity1 twin dyst rp vag_di~h stdre > s lev cook dfit dfb_hs 1. 253 5.533 3.707 yes 125 4 no no no no 2.50 > 8 0.008 0.005 0.230 -.1423404 2. 234 5.455 3.668 yes 125 1 no no no no 2.45 > 3 0.007 0.004 0.211 -.1360926 3. 205 5.323 3.766 no 125 0 no no no yes 2.15 > 9 0.028 0.013 0.366 -.1294927 4. 208 5.338 3.805 no 125 1 no no no no 2.10 > 3 0.008 0.003 0.185 -.1265206 5. 138 4.927 3.668 yes 125 1 no no no no 1.72 > 8 0.007 0.002 0.148 -.0996296 6. 139 4.934 3.805 no 125 1 no no no no 1.55 > 0 0.008 0.002 0.136 -.0969104 7. 130 4.868 3.681 yes 125 2 no no no no 1.62 > 8 0.007 0.002 0.137 -.0953079 8. 127 4.844 3.655 yes 125 0 no no no no 1.63 > 3 0.008 0.002 0.148 -.0941594 9. 126 4.836 3.655 yes 125 0 no no no no 1.62 > 2 0.008 0.002 0.147 -.0936193 10. 108 4.682 3.668 yes 125 1 no no no no 1.39 > 2 0.007 0.001 0.119 -.0827399 . browse wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres lev cook dfit df > b_hs in -10/-1 /* most extreme positive dfb_dyst values */ . . *analysis outliers observations . list wpc wpc_ln fit herd_size parity dyst rp vag_disch if wpc==1, clean compress noobs wpc wpc~n fit her~e par~y dyst rp vag~h 1 0.000 3.946 263 2 no no no 1 0.000 3.646 201 2 no no no . list wpc wpc_ln delres lev dfit cook dfb_dyst dfb_rp dfb_vd dfb_int dfb_hs if wpc==1, clean co > mpress noobs wpc wpc~n delres lev dfit cook df~st dfb~p dfb~d dfb~nt dfb_hs > 1 0.000 -5.448908 0.002 -0.243 0.006 0.044 0.053 0.042 -0.026 -.0160816 > 1 0.000 -5.028087 0.002 -0.243 0.006 0.051 0.037 0.028 -0.021 .0976687 > . . *original final model . reg wpc_ln hs100_ctr hs100_ctr_sq parity1 aut_calv twin dyst rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(9, 1564) = 18.05 Model | 86.9474063 9 9.66082293 Prob > F = 0.0000 Residual | 836.874538 1,564 .535086022 R-squared = 0.0941 -------------+---------------------------------- Adj R-squared = 0.0889 Total | 923.821945 1,573 .587299393 Root MSE = .7315 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | .343658 .0317233 10.83 0.000 .2814333 .4058827 hs100_ctr_sq | .2118728 .0456207 4.64 0.000 .1223885 .301357 parity1 | .0129844 .012586 1.03 0.302 -.0117028 .0376715 aut_calv | -.1371621 .0372127 -3.69 0.000 -.2101542 -.0641701 twin | .3927426 .1443661 2.72 0.007 .1095711 .6759141 dyst | .1109405 .0801013 1.39 0.166 -.0461768 .2680578 | rp | yes | .1123166 .0705612 1.59 0.112 -.0260878 .250721 | vag_disch | yes | -.0260135 .1050122 -0.25 0.804 -.231993 .1799661 | rp#vag_disch | yes#yes | .4136999 .183531 2.25 0.024 .0537072 .7736926 | _cons | 3.888587 .0386257 100.67 0.000 3.812823 3.96435 ------------------------------------------------------------------------------ . estimate store ln . . *re-fit without outliers/influential obs . reg wpc_ln hs100_ctr hs100_ctr_sq parity1 aut_calv twin dyst rp##vag_disch if wpc~=1 Source | SS df MS Number of obs = 1,572 -------------+---------------------------------- F(9, 1562) = 18.15 Model | 84.5110938 9 9.39012153 Prob > F = 0.0000 Residual | 807.934865 1,562 .517243831 R-squared = 0.0947 -------------+---------------------------------- Adj R-squared = 0.0895 Total | 892.445959 1,571 .568075085 Root MSE = .7192 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | .3391632 .031199 10.87 0.000 .2779669 .4003596 hs100_ctr_sq | .2026965 .0448706 4.52 0.000 .1146836 .2907094 parity1 | .0113333 .0123763 0.92 0.360 -.0129427 .0356093 aut_calv | -.1369515 .0366092 -3.74 0.000 -.2087598 -.0651432 twin | .3894441 .1419397 2.74 0.006 .1110316 .6678566 dyst | .1033889 .0787611 1.31 0.189 -.0510998 .2578777 | rp | yes | .1060341 .0693799 1.53 0.127 -.0300535 .2421216 | vag_disch | yes | -.0332815 .1032512 -0.32 0.747 -.2358071 .1692441 | rp#vag_disch | yes#yes | .4223009 .1804488 2.34 0.019 .0683535 .7762484 | _cons | 3.901024 .0380169 102.61 0.000 3.826455 3.975594 ------------------------------------------------------------------------------ . estimate store ln_noout . . estat hettest Breusch-Pagan / Cook-Weisberg test for heteroskedasticity Ho: Constant variance Variables: fitted values of wpc_ln chi2(1) = 16.22 Prob > chi2 = 0.0001 . estat imtest Cameron & Trivedi's decomposition of IM-test --------------------------------------------------- Source | chi2 df p ---------------------+----------------------------- Heteroskedasticity | 71.76 44 0.0051 Skewness | 14.82 9 0.0960 Kurtosis | 9.69 1 0.0019 ---------------------+----------------------------- Total | 96.27 54 0.0004 --------------------------------------------------- . capture drop fit_group . capture drop fit stdres . capture drop dfit cook delres . predict fit, xb . predict stdres, rstandard . predict delres, rstudent . predict dfit, dfit (2 missing values generated) . predict cook, cook . scalar nobs=1572 . scalar nparam=10 . . hist stdres (bin=31, start=-5.5060368, width=.26001653) . egen fit_group=cut(fit), group(10) . tabstat stdres, statistics( mean sd count ) by(fit_group) Summary for variables: stdres by categories of: fit_group fit_group | mean sd N ----------+------------------------------ 0 | -.1127984 1.068298 152 1 | -.0052168 1.078422 158 2 | .0429111 1.119622 160 3 | -.0532808 1.040071 125 4 | .0708433 1.016679 191 5 | .042046 1.083892 154 6 | -.0311166 1.100208 133 7 | -.0623551 .9994817 185 8 | .0161114 .8454157 138 9 | .0035924 .8149584 178 ----------+------------------------------ Total | -.0067148 1.017397 1574 ----------------------------------------- . sort delres . browse cow wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres cook > dfit in 1/10 . * outlier test based on deletion residuals . display invttail(nobs-nparam-1,.025/nobs) /* values >= abs(4.17) outliers*/ 4.1723589 . count if abs(delres) >=4.17 2 . sort dfit . browse cow wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres cook > dfit in 1/10 . gsort -cook . browse cow wpc wpc_ln fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres cook > dfit in 1/10 . *the two outliers don't have the highest infl. values . . estimate table ln ln_noout, //models are very similar since outliers did not have much influen > ce ---------------------------------------- Variable | ln ln_noout -------------+-------------------------- hs100_ctr | .34365799 .33916323 hs100_ctr_sq | .21187276 .20269649 parity1 | .01298436 .0113333 aut_calv | -.13716214 -.13695147 twin | .39274261 .3894441 dyst | .1109405 .10338895 | rp | yes | .11231661 .10603408 | vag_disch | yes | -.02601346 -.03328151 | rp#vag_disch | yes#yes | .41369992 .42230092 | _cons | 3.8885867 3.9010244 ---------------------------------------- . estimate table ln ln_noout, se //models are very similar since outliers did not have much influ > ence ---------------------------------------- Variable | ln ln_noout -------------+-------------------------- hs100_ctr | .34365799 .33916323 | .03172331 .031199 hs100_ctr_sq | .21187276 .20269649 | .04562075 .04487057 parity1 | .01298436 .0113333 | .01258597 .01237635 aut_calv | -.13716214 -.13695147 | .0372127 .03660917 twin | .39274261 .3894441 | .14436609 .14193974 dyst | .1109405 .10338895 | .08010133 .07876115 | rp | yes | .11231661 .10603408 | .07056115 .0693799 | vag_disch | yes | -.02601346 -.03328151 | .10501221 .10325122 | rp#vag_disch | yes#yes | .41369992 .42230092 | .18353098 .18044883 | _cons | 3.8885867 3.9010244 | .03862574 .03801689 ---------------------------------------- legend: b/se . . *other possible transformation sqroot /*recommended in VER*/ . use daisy2red_01, clear . gen wpc_sqrt=sqrt(wpc) . regress wpc_sqrt hs100_ctr hs100_ctr_sq parity1 aut_calv twin dyst rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(9, 1564) = 16.19 Model | 1133.93223 9 125.99247 Prob > F = 0.0000 Residual | 12172.3346 1,564 7.78282264 R-squared = 0.0852 -------------+---------------------------------- Adj R-squared = 0.0800 Total | 13306.2668 1,573 8.45916519 Root MSE = 2.7898 ------------------------------------------------------------------------------ wpc_sqrt | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | 1.230168 .120986 10.17 0.000 .992856 1.46748 hs100_ctr_sq | .7085556 .1739879 4.07 0.000 .3672814 1.04983 parity1 | .058304 .0480002 1.21 0.225 -.0358476 .1524556 aut_calv | -.5136529 .1419214 -3.62 0.000 -.7920293 -.2352766 twin | 1.385453 .5505819 2.52 0.012 .3054964 2.465409 dyst | .5422828 .3054896 1.78 0.076 -.0569296 1.141495 | rp | yes | .3894596 .2691054 1.45 0.148 -.1383858 .917305 | vag_disch | yes | -.0132839 .4004945 -0.03 0.974 -.7988467 .7722788 | rp#vag_disch | yes#yes | 1.491019 .6999486 2.13 0.033 .1180826 2.863956 | _cons | 7.516653 .1473104 51.03 0.000 7.227706 7.805599 ------------------------------------------------------------------------------ . estimate store sqrt . . capture drop fit stdres . capture drop delres . capture drop lev . capture drop dfit . capture drop cook . predict fit, xb . predict stdres, rstandard . predict delres, rstudent . predict lev, lev . predict cook, cooksd . predict dfit, dfit . . summ stdres delres Variable | Obs Mean Std. Dev. Min Max -------------+--------------------------------------------------------- stdres | 1,574 .000017 1.00045 -2.415783 3.273688 delres | 1,574 .0002416 1.001088 -2.419529 3.283912 . capture drop fit_group . egen fit_group=cut(fit), group(10) . tabstat stdres, statistics( mean sd count ) by(fit_group) Summary for variables: stdres by categories of: fit_group fit_group | mean sd N ----------+------------------------------ 0 | -.0632477 .8887984 149 1 | .0339114 1.010991 163 2 | -.0109345 .9917497 152 3 | .0406985 1.001835 160 4 | -.023528 .9507282 147 5 | -.0094748 1.005172 168 6 | .1685428 1.16272 161 7 | -.1452491 .9772877 150 8 | -.0213419 1.013437 142 9 | .0099244 .9711411 182 ----------+------------------------------ Total | .000017 1.00045 1574 ----------------------------------------- . . scatter stdres fit . estat hettest Breusch-Pagan / Cook-Weisberg test for heteroskedasticity Ho: Constant variance Variables: fitted values of wpc_sqrt chi2(1) = 0.40 Prob > chi2 = 0.5294 . estat imtest Cameron & Trivedi's decomposition of IM-test --------------------------------------------------- Source | chi2 df p ---------------------+----------------------------- Heteroskedasticity | 66.83 44 0.0148 Skewness | 147.13 9 0.0000 Kurtosis | 0.01 1 0.9201 ---------------------+----------------------------- Total | 213.97 54 0.0000 --------------------------------------------------- . hist stdres, normal (bin=31, start=-2.4157834, width=.18353134) . summ stdres, d Standardized residuals ------------------------------------------------------------- Percentiles Smallest 1% -1.698995 -2.415783 5% -1.294329 -2.018058 10% -1.125093 -1.890515 Obs 1,574 25% -.7864686 -1.885703 Sum of Wgt. 1,574 50% -.1844709 Mean .000017 Largest Std. Dev. 1.00045 75% .5989226 2.986266 90% 1.44447 3.088216 Variance 1.000899 95% 1.921609 3.115917 Skewness .7002212 99% 2.785691 3.273688 Kurtosis 2.989305 . summ delres,d Studentized residuals ------------------------------------------------------------- Percentiles Smallest 1% -1.700021 -2.419529 5% -1.294608 -2.020044 10% -1.125189 -1.892074 Obs 1,574 25% -.7863727 -1.887247 Sum of Wgt. 1,574 50% -.1844139 Mean .0002416 Largest Std. Dev. 1.001088 75% .5987997 2.993859 90% 1.444973 3.096685 Variance 1.002178 95% 1.923266 3.124635 Skewness .702245 99% 2.791735 3.283912 Kurtosis 2.996532 . . *influential analysis . summ lev dfit cook Variable | Obs Mean Std. Dev. Min Max -------------+--------------------------------------------------------- lev | 1,574 .0063532 .0083435 .0016636 .0642237 dfit | 1,574 .0004226 .0820973 -.3458335 .4470496 cook | 1,574 .0006727 .0015781 1.89e-10 .0199095 . count if lev>=.01905972 /* many (n=108) high leverage values */ 108 . gsort -lev . browse cow wpc wpc_sqrt fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres lev > in 1/10 /* most extreme leverages */ . * large Cook's D values . summ cook dfit Variable | Obs Mean Std. Dev. Min Max -------------+--------------------------------------------------------- cook | 1,574 .0006727 .0015781 1.89e-10 .0199095 dfit | 1,574 .0004226 .0820973 -.3458335 .4470496 . count if cook>=.0025413 /* many (n=90) high Cook's D values */ 90 . gsort -cook . browse cow wpc wpc_sqrt fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres delres coo > k dfit in 1/10 . count if abs(dfit)>=.15941443 /* many (n=92) high DFITS values */ 91 . sort dfit . browse wpc wpc_sqrt fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres lev cook dfit > in 1/10 /* most extreme negative DFITS values */ . browse wpc wpc_sqrt fit aut_calv herd_size parity1 twin dyst rp vag_disch stdres lev cook dfit > in -10/-1 /* most extreme positive DFITS values */ . * need to estimate dbetas and analyze results . * re-fit a model without influential observations . * for instance model with obs -.2> dfit <.2 . count if abs(dfit)>.2 50 . reg wpc_sqrt hs100_ctr hs100_ctr_sq parity1 aut_calv twin dyst rp##vag_disch if abs(dfit)<=.2 Source | SS df MS Number of obs = 1,524 -------------+---------------------------------- F(9, 1514) = 20.25 Model | 1277.66632 9 141.962924 Prob > F = 0.0000 Residual | 10613.0413 1,514 7.00993482 R-squared = 0.1075 -------------+---------------------------------- Adj R-squared = 0.1021 Total | 11890.7076 1,523 7.80742458 Root MSE = 2.6476 ------------------------------------------------------------------------------ wpc_sqrt | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | 1.328323 .1158729 11.46 0.000 1.101035 1.555612 hs100_ctr_sq | .6242158 .1674253 3.73 0.000 .2958057 .9526258 parity1 | .0348987 .0462384 0.75 0.451 -.0557993 .1255967 aut_calv | -.549195 .1368965 -4.01 0.000 -.8177219 -.280668 twin | 1.612204 .65713 2.45 0.014 .3232225 2.901186 dyst | .0347101 .3286393 0.11 0.916 -.6099264 .6793466 | rp | yes | .2639299 .2649223 1.00 0.319 -.2557236 .7835835 | vag_disch | yes | -.411556 .4399945 -0.94 0.350 -1.274619 .4515073 | rp#vag_disch | yes#yes | 2.315228 .7441607 3.11 0.002 .8555325 3.774923 | _cons | 7.553243 .1414882 53.38 0.000 7.27571 7.830777 ------------------------------------------------------------------------------ . . estimate store sqrt_dfit02 . estimate table sqrt sqrt_dfit02, star(0.05 0.01 0.001) ---------------------------------------------- Variable | sqrt sqrt_dfit02 -------------+-------------------------------- hs100_ctr | 1.2301679*** 1.3283233*** hs100_ctr_sq | .70855564*** .62421576*** parity1 | .05830397 .03489872 aut_calv | -.51365293*** -.54919497*** twin | 1.3854529* 1.612204* dyst | .54228284 .03471012 | rp | yes | .38945957 .26392994 | vag_disch | yes | -.01328393 -.41155604 | rp#vag_disch | yes#yes | 1.4910191* 2.3152275** | _cons | 7.5166527*** 7.5532434*** ---------------------------------------------- legend: * p<.05; ** p<.01; *** p<.001 . . **obtain predictions - don't worry about this know - will be explained later . reg wpc_sqrt hs100_ctr hs100_ctr_sq parity1 aut_calv twin dyst rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(9, 1564) = 16.19 Model | 1133.93223 9 125.99247 Prob > F = 0.0000 Residual | 12172.3346 1,564 7.78282264 R-squared = 0.0852 -------------+---------------------------------- Adj R-squared = 0.0800 Total | 13306.2668 1,573 8.45916519 Root MSE = 2.7898 ------------------------------------------------------------------------------ wpc_sqrt | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | 1.230168 .120986 10.17 0.000 .992856 1.46748 hs100_ctr_sq | .7085556 .1739879 4.07 0.000 .3672814 1.04983 parity1 | .058304 .0480002 1.21 0.225 -.0358476 .1524556 aut_calv | -.5136529 .1419214 -3.62 0.000 -.7920293 -.2352766 twin | 1.385453 .5505819 2.52 0.012 .3054964 2.465409 dyst | .5422828 .3054896 1.78 0.076 -.0569296 1.141495 | rp | yes | .3894596 .2691054 1.45 0.148 -.1383858 .917305 | vag_disch | yes | -.0132839 .4004945 -0.03 0.974 -.7988467 .7722788 | rp#vag_disch | yes#yes | 1.491019 .6999486 2.13 0.033 .1180826 2.863956 | _cons | 7.516653 .1473104 51.03 0.000 7.227706 7.805599 ------------------------------------------------------------------------------ . qui margins , over(rp vag_disch dyst) /// > at(parity=2 aut_calv=0 hs100_ctr=0 hs100_ctr_sq=0 twin=0) expression(predict(xb)^2) . matrix define sqrt_wpc = r(table)' . capture drop sqrt_wpc* . svmat sqrt_wpc . drop sqrt_wpc2 - sqrt_wpc9 . *se predictions . capture drop var se . capture drop loci* upci* sqrt_wpc* . matrix define var=vecdiag(e(V))' . svmat var . gen se=sqrt(var) (1,559 missing values generated) . scalar tstar=invttail(1564,.025) . gen loci_sqrt=sqrt(sqrt_wpc1)-tstar*se (1,566 missing values generated) . gen upci_sqrt=sqrt(sqrt_wpc1)+tstar*se (1,566 missing values generated) . gen sqrtwpc_l=loci_sqrt^2 (1,566 missing values generated) . gen sqrtwpc_u=upci_sqrt^2 (1,566 missing values generated) . . *predictions for ln model . gen wpc_ln=ln(wpc) . regress wpc_ln hs100_ctr hs100_ctr_sq parity1 aut_calv twin dyst rp##vag_disch Source | SS df MS Number of obs = 1,574 -------------+---------------------------------- F(9, 1564) = 18.05 Model | 86.9474063 9 9.66082293 Prob > F = 0.0000 Residual | 836.874538 1,564 .535086022 R-squared = 0.0941 -------------+---------------------------------- Adj R-squared = 0.0889 Total | 923.821945 1,573 .587299393 Root MSE = .7315 ------------------------------------------------------------------------------ wpc_ln | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs100_ctr | .343658 .0317233 10.83 0.000 .2814333 .4058827 hs100_ctr_sq | .2118728 .0456207 4.64 0.000 .1223885 .301357 parity1 | .0129844 .012586 1.03 0.302 -.0117028 .0376715 aut_calv | -.1371621 .0372127 -3.69 0.000 -.2101542 -.0641701 twin | .3927426 .1443661 2.72 0.007 .1095711 .6759141 dyst | .1109405 .0801013 1.39 0.166 -.0461768 .2680578 | rp | yes | .1123166 .0705612 1.59 0.112 -.0260878 .250721 | vag_disch | yes | -.0260135 .1050122 -0.25 0.804 -.231993 .1799661 | rp#vag_disch | yes#yes | .4136999 .183531 2.25 0.024 .0537072 .7736926 | _cons | 3.888587 .0386257 100.67 0.000 3.812823 3.96435 ------------------------------------------------------------------------------ . qui margins , over(rp vag_disch dyst) at(parity=2 aut_calv=0 hs100_ctr=0 hs100_ctr_sq=0 twin=0) > expression(exp(predict(xb))) . matrix define ln_wpc = r(table)' . capture drop ln_wpc* . svmat ln_wpc . drop ln_wpc2 - ln_wpc9 . *se predictions . capture drop varln seln . capture drop ln_loci ln_upci ln_wpc* . matrix define varln=vecdiag(e(V))' . svmat varln . gen seln=sqrt(varln) (1,559 missing values generated) . scalar tstar=invttail(1564,.025) . gen ln_loci_sqrt=ln(ln_wpc1)-tstar*seln (1,566 missing values generated) . gen ln_upci_sqrt=ln(ln_wpc1)+tstar*seln (1,566 missing values generated) . gen ln_wpc_l=exp(ln_loci) (1,566 missing values generated) . gen ln_wpc_u=exp(ln_upci) (1,566 missing values generated) . *format numeric values to 3 decimal places . foreach var in sqrt_wpc1 sqrtwpc_l sqrtwpc_u ln_wpc1 ln_wpc_l ln_wpc_u{ 2. format `var' %5.3f 3. } . list sqrt_wpc1 sqrtwpc_l sqrtwpc_u ln_wpc1 ln_wpc_l ln_wpc_u in 1/8, clean header noobs sqrt_w~1 sqrtwp~l sqrtwp~u ln_wpc1 ln_wpc_l ln_wpc_u 58.267 54.700 61.946 50.127 47.103 53.345 66.840 61.376 72.536 56.008 51.214 61.251 58.064 56.638 59.508 48.840 47.649 50.060 66.622 62.156 71.244 54.570 50.729 58.702 64.364 48.202 82.859 56.085 42.254 74.444 73.359 63.454 83.983 62.666 53.554 73.327 90.259 90.259 90.259 82.645 82.645 82.645 100.857 90.533 111.737 92.342 80.406 106.049 . log close name: log: C:\vhm812-data\2bl_reg_dx.txt log type: text closed on: 14 Jan 2016, 09:08:59 -------------------------------------------------------------------------------------------------