Appendix
Denote \({\mathcal {B}}=\{\beta \in {\mathbf {R}}^{p}:\Vert \beta \Vert =1\), and the first non-zero component of \(\beta \) is positive\(\}\), then \(\beta _{0}\) is an inner point of the compact set \({\mathcal {B}}\). Meanwhile, the assumptions \(\Vert {\hat{\beta }}-\beta _{0}\Vert =O_{P}(T^{-1/2})\) and \(\Vert {\hat{\theta }}-\theta _{0}\Vert =O_{P}(T^{-1/2})\) are required for the following proofs as in [6, 20].
Proof of Theorem A.1
As \(\Vert {\hat{\beta }}-\beta _{0}\Vert =O_{P}(T^{-1/2})\), it is easy to get that \({\hat{g}}(w;{\hat{\beta }},{\hat{\theta }})-{\hat{g}}(w;\beta _{0},{\hat{\theta }})=O_{P}(T^{-1/2})\) by Lagrange mean-value theorem. Note that
$$\begin{aligned}&\sqrt{Th}[{\hat{g}}(w;{\hat{\beta }},{\hat{\theta }})-g_{0}(w)]\nonumber \\&\quad = \sqrt{Th}[{\hat{g}}(w;{\hat{\beta }},{\hat{\theta }})- {\hat{g}}(w;\beta _{0},{\hat{\theta }})+{\hat{g}}(w;\beta _{0},{\hat{\theta }})-g_{0}(w)] \nonumber \\&\quad = \sqrt{Th}[{\hat{g}}(w;{\hat{\beta }},{\hat{\theta }})-{\hat{g}}(w;\beta _{0},{\hat{\theta }})]+\sqrt{Th}[{\hat{g}}(w;\beta _{0},{\hat{\theta }})-g_{0}(w)], \end{aligned}$$
(A.1)
then we only need to illustrate the asymptotic property of \({\hat{g}}(w;\beta _{0},{\hat{\theta }})\). Minimize
$$\begin{aligned} \sum _{t=1}^{T}\{Y_{t}-[a+b(\beta _{0} ^\top X_{t}-w)]Y_{t-1}-{\hat{\theta }} ^\top Z_{t}\}^{2}K_{h}(\beta _{0} ^\top X_{t}-w), \end{aligned}$$
with respect to (a, b), and \(({\hat{g}}(w;\beta _{0},{\hat{\theta }}),{\hat{g}}'(w;\beta _{0},{\hat{\theta }}))\) can be obtained, whose form is similar to that of (2.2) with \((\beta ,\theta )\) replaced by \((\beta _{0},{\hat{\theta }})\).
According to Doukhan et al. [9], a strongly mixing stationary sequence is an ergodic sequence, so that \(\{Y_t,t\ge 1\}\) is a strictly stationary and ergodic sequence. Therefore, the second moment of \(Y_{t-1}\) is constant, denoted as \(\gamma =E(Y_{t-1}^{2})\), because of the constant mean and constant variance in stationary sequences.
Based on the Lemma 1 in [29], we can get that, for each \(j=0,1,2,3\),
$$\begin{aligned}&\frac{1}{T}\sum _{t=1}^{T}\left\{ Y_{t-1}^{2}\left( \frac{\beta _{0}^\top X_{t}-w}{h}\right) ^{j}K_{h}(\beta _{0}^\top X_{t}-w) \right. \\&\qquad \left. -E\left[ Y_{t-1}^{2}\left( \frac{\beta _{0}^\top X_{t}-w}{h}\right) ^{j}K_{h}(\beta _{0}^\top X_{t}-w)\right] \right\} \\&\quad =O\left( \left\{ \frac{\log T}{Th}\right\} ^{1/2}\right) , \mathrm{a.s.}, \end{aligned}$$
uniformly for \(w \in {\mathcal {W}}\), where,
$$\begin{aligned}&E\left[ Y_{t-1}^{2}\left( \frac{\beta _{0}^\top X_{t}-w}{h}\right) ^{j}K_{h}(\beta _{0}^\top X_{t}-w)\right] \\&\quad = E(Y_{t-1}^{2}) E\left[ \left( \frac{\beta _{0}^\top X_{t}-w}{h}\right) ^{j}K_{h}(\beta _{0}^\top X_{t}-w)\right] \\&\quad = \gamma f_{0}(w)\mu _{j}+O(h), \end{aligned}$$
i.e.,
$$\begin{aligned} R_{T,j}(w;\beta _{0},{\hat{\theta }}) = \gamma f_{0}(w)\mu _{j}+O\left( \left\{ \frac{\log T}{Th}\right\} ^{1/2}+h\right) ,\ \mathrm{a.s.} \end{aligned}$$
(A.2)
So, it is followed immediately that
$$\begin{aligned} R_{T}(w;\beta _{0},{\hat{\theta }})=R(w)+O\left( \left\{ \frac{\log T}{Th}\right\} ^{1/2}+h\right) ,\ \mathrm{a.s.}, \end{aligned}$$
where \(R(w)=\gamma f_{0}(w)\mathrm{diag}\{1,\mu _{2}\}\). Using the fact that
$$\begin{aligned} (A+hB)^{-1}=A^{-1}-hA^{-1}BA^{-1}+O(h^{2}), \end{aligned}$$
we have
$$\begin{aligned} R_{T}^{-1}(w;\beta _{0},{\hat{\theta }}) = R^{-1}(w)+O\left( \left\{ \frac{\log T}{Th}\right\} ^{1/2}+h\right) ,\ \mathrm{a.s.}, \end{aligned}$$
(A.3)
uniformly for \(w \in {\mathcal {W}}\). Let
$$\begin{aligned}&\eta _{T,j}^{*}(w;\beta _{0},{\hat{\theta }})\\&\quad =\frac{1}{T}\sum _{t=1}^{T} [Y_{t}-{\hat{\theta }}^\top Z_{t} -g_{0}(\beta _{0}^\top X_{t})Y_{t-1}]Y_{t-1}\left( \frac{\beta _{0}^\top X_{t}-w}{h}\right) ^{j}\\&K_{h}(\beta _{0}^\top X_{t}-w),j=0,1 \end{aligned}$$
and
$$\begin{aligned} \eta _{T}^{*}(w;\beta ,{\hat{\theta }})=\begin{pmatrix} \eta _{T,0}^{*}(w;\beta _{0},{\hat{\theta }})\\ \eta _{T,1}^{*}(w;\beta _{0},{\hat{\theta }}) \end{pmatrix}. \end{aligned}$$
As
$$\begin{aligned}&E\left\{ [Y_{t}-{\hat{\theta }}^\top Z_{t}-g_{0}(\beta _{0}^\top X_{t})Y_{t-1}]Y_{t-1}\left( \frac{\beta _{0}^\top X_{t} -w}{h}\right) ^{j}K_{h}(\beta _{0}^\top X_{t}-w)\right\} \\&\quad = E\left\{ [Y_{t}-\theta _{0}^\top Z_{t}+\theta _{0}^\top Z_{t}-{\hat{\theta }}^\top Z_{t}-g_{0}(\beta _{0}^\top X_{t})Y_{t-1}]Y_{t-1} \right. \\&\qquad \left. \left( \frac{\beta _{0}^\top X_{t} -w}{h}\right) ^{j}K_{h}(\beta _{0}^\top X_{t}-w)\right\} \\&\quad = E\left[ \varepsilon _{t} Y_{t-1}\left( \frac{\beta _{0}^\top X_{t} -w}{h}\right) ^{j}K_{h}(\beta _{0}^\top X_{t}-w)\right] \\&\qquad +E\left[ (\theta _{0}-{\hat{\theta }})^\top Z_{t}Y_{t-1} \left( \frac{\beta _{0}^\top X_{t}-w}{h}\right) ^{j}K_{h}(\beta _{0}^\top X_{t}-w)\right] \\&\quad = O(T^{-1/2}), \end{aligned}$$
by Lemma 1 in [29] and arguments similar to that in the previous proof, we can show that
$$\begin{aligned} \eta _{T,j}^{*}(w;\beta _{0},{\hat{\theta }})=O\left( \left\{ \frac{\log T}{Th}\right\} ^{1/2}+T^{-1/2}\right) ,\ \mathrm{a.s.}, \end{aligned}$$
uniformly for \(w \in {\mathcal {W}}\). Using Taylor’s expansion for \(g_{0}(\beta _{0}^\top X_{t})\) at w, it is presented that
$$\begin{aligned}&\eta _{T,j}(w;\beta _{0},{\hat{\theta }})-\eta _{T,j}^{*}(w;\beta _{0},{\hat{\theta }})\\&\quad = R_{T,j}(w;\beta _{0},{\hat{\theta }})g_{0}(w)+hR_{T,j+1}(w;\beta _{0},{\hat{\theta }})g_{0}'(w)\\&\qquad +\frac{1}{2}h^{2}R_{T,j+2}(w;\beta _{0},{\hat{\theta }})g_{0}''(w)+o(h^{2}),\ \mathrm{a.s.}, \end{aligned}$$
so that
$$\begin{aligned}&\eta _{T}(w;\beta _{0},{\hat{\theta }})-\eta _{T}^{*}(w;\beta _{0},{\hat{\theta }}) = R_{T}(w;\beta _{0},{\hat{\theta }})\begin{pmatrix} g_{0}(w) \\ hg_{0}'(w) \end{pmatrix}\nonumber \\&\qquad +\frac{1}{2}h^{2}g_{0}''(w)\begin{pmatrix} R_{T,2}(w;\beta _{0},{\hat{\theta }}) \\ R_{T,3}(w;\beta _{0},{\hat{\theta }}) \end{pmatrix} +o(h^{2}),\ \mathrm{a.s.} \end{aligned}$$
(A.4)
Thus, from the formulae (A.2), (A.3) and (A.4) above, it follows that
$$\begin{aligned} \begin{pmatrix} {\hat{g}}(w;\beta _{0},{\hat{\theta }})-g_{0}(w) \\ h[{\hat{g}}'(w;\beta _{0},{\hat{\theta }})-g_{0}'(w)] \end{pmatrix}= & {} R^{-1}(w)\eta _{T}^{*}(w;\beta _{0},{\hat{\theta }})\\&+\frac{1}{2}h^{2}g_{0}''(w)\begin{pmatrix} \mu _{2} \\ \frac{\mu _{3}}{\mu _{2}} \end{pmatrix} +o(T^{-1/2}+h^{2}),\ \mathrm{a.s.} \end{aligned}$$
Clearly,
$$\begin{aligned} {\hat{g}}(w;\beta _{0},{\hat{\theta }})-g_{0}(w)= & {} \gamma ^{-1}f_{0}^{-1}(w)\eta _{T,0}^{*}(w;\beta _{0},{\hat{\theta }})\\&+\frac{1}{2}h^{2}g_{0}''(w)\mu _{2}+o_{P}(T^{-1/2}+h^{2}), \end{aligned}$$
uniformly for \(w \in {\mathcal {W}}\).
By simple calculation, we have
$$\begin{aligned}&\sqrt{Th}\eta _{T,0}^{*}(w;\beta _{0},{\hat{\theta }}) \\&\quad = \sqrt{Th} \frac{1}{T}\sum _{t=1}^{T}[Y_{t}-{\hat{\theta }}^\top Z_{t} -g_{0}(\beta _{0}^\top X_{t})Y_{t-1}]Y_{t-1}K_{h}(\beta _{0}^\top X_{t}-w) \\&\quad = \sqrt{Th}\left\{ \frac{1}{T}\sum _{t=1}^{T}[Y_{t}-\theta _{0}^\top Z_{t} -g_{0}(\beta _{0}^\top X_{t})Y_{t-1}]Y_{t-1}K_{h}(\beta _{0}^\top X_{t}-w)\right. \\&\qquad \left. +O_{P}(T^{-1/2})\right\} \\&\quad = \sqrt{Th}[\frac{1}{T}\sum _{t=1}^{T}\varepsilon _{t}Y_{t-1}K_{h}(\beta _{0}^\top X_{t}-w)]+o_{P}(1)\\&\quad = \frac{1}{\sqrt{T}}\sum _{t=1}^{T}\{\sqrt{h}\varepsilon _{t}Y_{t-1}K_{h}(\beta _{0}^\top X_{t}-w) -\sqrt{h}E[\varepsilon _{t}Y_{t-1}K_{h}(\beta _{0}^\top X_{t}-w)]\}\\&\qquad +o_{P}(1). \end{aligned}$$
By Slutsky’s theorem and Theorem 4.4 of [21], it can be obtained immediately that
$$\begin{aligned} \sqrt{Th}\eta _{T,0}^{*}(w;\beta _{0},{\hat{\theta }}) \overset{L}{\rightarrow }N(0,\gamma \nu _{0}\sigma ^{2}f_{0}(w)). \end{aligned}$$
Consequently, we can derive
$$\begin{aligned} \sqrt{Th}\left[ {\hat{g}}(w;\beta _{0},{\hat{\theta }})-g_{0}(w)-\frac{1}{2}h^{2}g_{0}''(w)\mu _{2}\right] \overset{L}{\rightarrow }N(0,\gamma ^{-1}\nu _{0}\sigma ^{2}f_{0}^{-1}(w)), \end{aligned}$$
and hence, from (A.1),
$$\begin{aligned} \sqrt{Th}\left[ {\hat{g}}(w;{\hat{\beta }},{\hat{\theta }})-g_{0}(w)-\frac{1}{2}h^{2}g_{0}''(w)\mu _{2}\right] \overset{L}{\rightarrow }N(0,\gamma ^{-1}\nu _{0}\sigma ^{2}f_{0}^{-1}(w)). \end{aligned}$$
The proof is complete. \(\square \)
Proof of Theorem A.2
By (2.3), with \(\lambda \) as the Lagrange multiplier, it is easy to know that \(({\hat{\beta }},{\hat{\theta }})\) is the solution to
$$\begin{aligned}&\lambda \begin{pmatrix} {\hat{\beta }} \\ 0 \end{pmatrix} +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}[Y_{t}-{\hat{g}}({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})Y_{t-1}\nonumber \\&\qquad -{\hat{\theta }}^\top Z_{t}] \begin{pmatrix} {\hat{g}}'({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})Y_{t-1}X_{t}\\ Z_{t} \end{pmatrix} = 0, \end{aligned}$$
(A.5)
which can be rewritten as
$$\begin{aligned}&\lambda \begin{pmatrix} {\hat{\beta }} \\ 0 \end{pmatrix} +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t} \begin{pmatrix} {\hat{g}}'({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})Y_{t-1}X_{t}\\ Z_{t} \end{pmatrix}\\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T}[({\hat{\theta }}-\theta _{0})^\top Z_{t}] \begin{pmatrix} {\hat{g}}'({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})Y_{t-1}X_{t}\\ Z_{t} \end{pmatrix}\\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} [{\hat{g}}({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})]Y_{t-1}\\&\qquad \begin{pmatrix} {\hat{g}}'({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})Y_{t-1}X_{t}\\ Z_{t} \end{pmatrix} = 0. \end{aligned}$$
Through direct calculation, we find that
$$\begin{aligned}&\lambda \begin{pmatrix} {\hat{\beta }} \\ 0 \end{pmatrix} +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t}A_{t} -\frac{1}{\sqrt{T}}\sum _{t=1}^{T}[({\hat{\theta }}-\theta _{0})^\top Z_{t}]A_{t}\\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} [{\hat{g}}({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})]Y_{t-1} A_{t} \\&\qquad +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t}B_{t} -\frac{1}{\sqrt{T}}\sum _{t=1}^{T}[({\hat{\theta }}-\theta _{0})^\top Z_{t}]B_{t} \\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} [{\hat{g}}({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})]Y_{t-1}B_{t} = 0, \end{aligned}$$
where,
$$\begin{aligned} A_{t}= & {} \begin{pmatrix} g_{0}'(\beta _{0}^\top X_{t})Y_{t-1}X_{t}\\ Z_{t} \end{pmatrix},~~~B_{t}=\begin{pmatrix} [{\hat{g}}'({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }}) -g_{0}'(\beta _{0}^\top X_{t})]Y_{t-1}X_{t}\\ Z_{t} \end{pmatrix}. \end{aligned}$$
So, we have the equation
$$\begin{aligned}&\lambda \begin{pmatrix} {\hat{\beta }} \\ 0 \end{pmatrix} +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t}A_{t} -\frac{1}{\sqrt{T}}\sum _{t=1}^{T}[({\hat{\theta }}-\theta _{0})^\top Z_{t}]A_{t} \nonumber \\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} [{\hat{g}}({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})]Y_{t-1} A_{t} \nonumber \\&\qquad +o_{P}(1)= 0. \end{aligned}$$
(A.6)
Obviously,
$$\begin{aligned}&{\hat{g}}({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})\nonumber \\&\quad = {\hat{g}}({\hat{\beta }}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }}) +{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }}) -g_{0}(\beta _{0}^\top X_{t})\nonumber \\&\quad = {\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})+{\hat{g}}'(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }}) ({\hat{\beta }}-\beta _{0})^\top X_{t}+o_{p}(({\hat{\beta }}-\beta _{0})^\top X_{t})\nonumber \\&\qquad -{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})+{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }}) -g_{0}(\beta _{0}^\top X_{t}) \nonumber \\&\quad = {\hat{g}}'(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})({\hat{\beta }}-\beta _{0})^\top X_{t} +{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})+o_{P}(T^{-1/2})\nonumber \\&\quad = g_{0}'(\beta _{0}^\top X_{t})({\hat{\beta }}-\beta _{0})^\top X_{t} +{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})\nonumber \\&\qquad +[{\hat{g}}'(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}'(\beta _{0}^\top X_{t})]({\hat{\beta }} -\beta _{0})^\top X_{t}+o_{P}(T^{-1/2})\nonumber \\&\quad = g_{0}'(\beta _{0}^\top X_{t})({\hat{\beta }}-\beta _{0})^\top X_{t} +{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})+o_{P}(T^{-1/2}).\nonumber \\ \end{aligned}$$
(A.7)
Substituting (A.7) into (A.6), we can obtain
$$\begin{aligned}&\lambda \begin{pmatrix} {\hat{\beta }} \\ 0 \end{pmatrix} +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t}A_{t} -\frac{1}{\sqrt{T}}\sum _{t=1}^{T}[({\hat{\theta }}-\theta _{0})^\top Z_{t}]A_{t}\\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} g_{0}'(\beta _{0}^\top X_{t})({\hat{\beta }}-\beta _{0})^\top X_{t}Y_{t-1}A_{t}\\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} [{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})]Y_{t-1} A_{t}+o_{P}(1) = 0, \end{aligned}$$
meaning that
$$\begin{aligned}&\lambda \begin{pmatrix} {\hat{\beta }} \\ 0 \end{pmatrix} +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t}A_{t}\\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T}A_{t}A_{t}^\top \begin{pmatrix} {\hat{\beta }}-\beta _{0} \\ {\hat{\theta }}-\theta _{0} \end{pmatrix} -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} [{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})]Y_{t-1}A_{t}\\&\qquad +o_{P}(1) = 0. \end{aligned}$$
Using the Ergodic theorem, we have
$$\begin{aligned}&\lambda \begin{pmatrix} {\hat{\beta }} \\ 0 \end{pmatrix} +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t}A_{t} -\sqrt{T}A \begin{pmatrix} {\hat{\beta }}-\beta _{0} \\ {\hat{\theta }}-\theta _{0} \end{pmatrix}\nonumber \\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} [{\hat{g}}(\beta _{0}^\top X_{t};{\hat{\beta }},{\hat{\theta }})-g_{0}(\beta _{0}^\top X_{t})]Y_{t-1}A_{t}+o_{P}(1) = 0, \end{aligned}$$
(A.8)
where
$$\begin{aligned} A=E[A_{t}A_{t}^\top ]. \end{aligned}$$
On the other hand, following the estimation procedure, \(({\hat{a}},{\hat{b}})\equiv ({\hat{g}}(w;{\hat{\beta }},{\hat{\theta }}),{\hat{g}}'(w;\hat{\beta },{\hat{\theta }}))\) is the minimizer of
$$\begin{aligned} \sum _{t=1}^{T}\{Y_{t}-[a+b({\hat{\beta }} ^\top X_{t}-w)]Y_{t-1} -{\hat{\theta }} ^\top Z_{t}\}^{2}K_{h}({\hat{\beta }} ^\top X_{t}-w), \end{aligned}$$
then, \(({\hat{a}},{\hat{b}})\) satisfies the formula
$$\begin{aligned}&\frac{1}{T}\sum _{t=1}^{T}\{Y_{t}-[{\hat{a}}+h{\hat{b}}({\hat{\beta }}^\top X_{t}-w)/h]Y_{t-1} -{\hat{\theta }}^\top Z_{t}\}Y_{t-1} \nonumber \\&\qquad \begin{pmatrix} 1 \\ ({\hat{\beta }}^\top X_{t}-w)/h \end{pmatrix} K_{h}({\hat{\beta }}^\top X_{t}-w) = 0, \end{aligned}$$
(A.9)
via Taylor expansion and using the conditions on h, we get
$$\begin{aligned}&\frac{1}{T}\sum _{t=1}^{T}\{Y_{t}-[a+b(\beta _{0}^\top X_{t}-w)]Y_{t-1}-\theta _{0}^\top Z_{t}\}Y_{t-1}\\&\qquad \begin{pmatrix} 1 \\ (\beta _{0}^\top X_{t}-w)/h \end{pmatrix} K_{h}(\beta _{0}^\top X_{t}-w)\\&\qquad -B_{T1}\begin{pmatrix} {\hat{a}}-a \\ h({\hat{b}}-b) \end{pmatrix} -B_{T2}({\hat{\beta }}-\beta _{0})-B_{T3}({\hat{\theta }}-\theta _{0})\\&\qquad +o_{P}(T^{-1/2})+O_{P}(h^{2}) = 0, \end{aligned}$$
where
$$\begin{aligned} B_{T1}= & {} \frac{1}{T}\sum _{t=1}^{T}Y_{t-1}^{2}K_{h}(\beta _{0}^\top X_{t}-w)\begin{pmatrix} 1 &{} \frac{\beta _{0}^\top X_{t}-w}{h} \\ \frac{\beta _{0}^\top X_{t}-w}{h} &{} (\frac{\beta _{0}^\top X_{t}-w}{h})^{2} \end{pmatrix},\\ B_{T2}= & {} \frac{1}{T}\sum _{t=1}^{T}g_{0}'(w)Y_{t-1}^{2}K_{h}(\beta _{0}^\top X_{t}-w)\begin{pmatrix} X_{t}^\top \\ X_{t}^\top \frac{\beta _{0}^\top X_{t}-w}{h} \end{pmatrix} \end{aligned}$$
and
$$\begin{aligned} B_{T3}=\frac{1}{T}\sum _{t=1}^{T}Y_{t-1}K_{h}(\beta _{0}^\top X_{t}-w)\begin{pmatrix} Z_{t}^\top \\ Z_{t}^\top \frac{\beta _{0}^\top X_{t}-w}{h} \end{pmatrix}. \end{aligned}$$
Similarly, from Lemma 1 in [29], we provide the asymptotic counterparts of \(B_{T,j}(j=1,2,3)\) as follows:
$$\begin{aligned} B_{T1}= & {} \gamma f_{0}(w)\mathrm{diag}\{1,\mu _{2}\}+O_{P}\left( \left\{ \frac{\log T}{Th}\right\} ^{1/2}+h\right) ,\\ B_{T2}= & {} g_{0}'(w) \left[ \begin{pmatrix} \gamma f_{0}(w)E(X_{t}^\top |\beta _{0}^\top X_{t}=w) \\ 0 \end{pmatrix} +O_{P}\left( \left\{ \frac{\log T}{Th}\right\} ^{1/2}+h\right) \right] \end{aligned}$$
and
$$\begin{aligned} B_{T3}=\begin{pmatrix} E(Y_{t-1})f_{0}(w)E(Z_{t}^\top | \beta _{0}^\top X_{t}=w)\\ 0 \end{pmatrix} +O_{P}\left( \left\{ \frac{\log T}{Th}\right\} ^{1/2}+h\right) . \end{aligned}$$
Thus,
$$\begin{aligned} \begin{pmatrix} {\hat{a}}-a \\ h({\hat{b}}-b) \end{pmatrix}= & {} \frac{1}{T}\sum _{t=1}^{T}\varepsilon _{t}Y_{t-1}\gamma ^{-1}f_{0}^{-1}(w) \begin{pmatrix} 1 \\ \frac{\beta _{0}^\top X_{t}-w}{h\mu _{2}} \end{pmatrix} K_{h}(\beta _{0}^\top X_{t}-w)\\&-\gamma ^{-1}f_{0}^{-1}(w) \begin{pmatrix} g_{0}'(w)E(X_{t}^\top |\beta _{0}^\top X_{t}=w)\gamma f_{0}(w) \\ 0 \end{pmatrix} ({\hat{\beta }}-\beta _{0})\\&-\gamma ^{-1}f_{0}^{-1}(w) \begin{pmatrix} E(Z_{t}^\top |\beta _{0}^\top X_{t}=w)E(Y_{t-1}) f_{0}(w) \\ 0 \end{pmatrix} ({\hat{\theta }}-\theta _{0})\\&+o_{P}(T^{-1/2}). \end{aligned}$$
Then, it can be shown that
$$\begin{aligned} {\hat{g}}(w;{\hat{\beta }},{\hat{\theta }})-g_{0}(w)= & {} \frac{1}{T}\sum _{t=1}^{T}\gamma ^{-1}f_{0}^{-1}(w) Y_{t-1}K_{h}(\beta _{0}^\top X_{t}-w)\varepsilon _{t}\nonumber \\&-g_{0}'(w)({\hat{\beta }}-\beta _{0})^\top E(X_{t}|\beta _{0}^\top X_{t}=w)\nonumber \\&-\gamma ^{-1}E(Y_{t-1})({\hat{\theta }}-\theta _{0})^\top E(Z_{t}|\beta _{0}^\top X_{t}=w)\nonumber \\&+o_{P}(T^{-1/2}). \end{aligned}$$
(A.10)
Substituting (A.10) into (A.8) and applying the Ergodic theorem at the same time, we get
$$\begin{aligned}&\lambda \begin{pmatrix} {\hat{\beta }} \\ 0 \end{pmatrix} +\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t}A_{t} -\sqrt{T}B \begin{pmatrix} {\hat{\beta }}-\beta _{0} \\ {\hat{\theta }}-\theta _{0} \end{pmatrix}\nonumber \\&\qquad -\frac{1}{\sqrt{T}}\sum _{t=1}^{T} \frac{1}{T}\sum _{l=1}^{T}[\gamma ^{-1}f_{0}^{-1}(\beta _{0}^\top X_{t})Y_{l-1} Y_{t-1}K_{h}(\beta _{0}^\top X_{l}-\beta _{0}^\top X_{t})A_{t}\varepsilon _{l}]\nonumber \\&\qquad +o_{P}(1) = 0, \end{aligned}$$
(A.11)
where
$$\begin{aligned} B=E[A_{t}A_{t}^\top ]-E\left[ A_{t} \begin{pmatrix} g_{0}'(\beta _{0}^\top X_{t})Y_{t-1}E(X_{t}|\beta _{0}^\top X_{t})\\ \gamma ^{-1}Y_{t-1}E(Y_{t-1})E(Z_{t}|\beta _{0}^\top X_{t}) \end{pmatrix}^\top \right] . \end{aligned}$$
Handle the fourth term in (A.11) by interchanging the summations and we have
$$\begin{aligned} -\frac{1}{\sqrt{T}}\sum _{l=1}^{T}\varepsilon _{l}Y_{l-1}\frac{1}{T}\sum _{t=1}^{T} [\gamma ^{-1}f_{0}^{-1}(\beta _{0}^\top X_{t})Y_{t-1}K_{h}(\beta _{0}^\top X_{l}-\beta _{0}^\top X_{t})A_{t}]. \end{aligned}$$
Furthermore, by the Ergodic theorem, the term is equivalent asymptotically to
$$\begin{aligned} -\frac{1}{\sqrt{T}}\sum _{l=1}^{T}\varepsilon _{l}Y_{l-1}E[ \gamma ^{-1}f_{0}^{-1}(\beta _{0}^\top X_{t})Y_{t-1}K_{h}(\beta _{0}^\top X_{l}-\beta _{0}^\top X_{t})A_{t}], \end{aligned}$$
i.e.,
$$\begin{aligned} -\frac{1}{\sqrt{T}}\sum _{l=1}^{T}\varepsilon _{l}Y_{l-1} \begin{pmatrix} g_{0}'(\beta _{0}^\top X_{l})E(X_{l}|\beta _{0}^\top X_{l}) \\ \gamma ^{-1}E(Y_{t-1})E(Z_{l}|\beta _{0}^\top X_{l}) \end{pmatrix}. \end{aligned}$$
(A.12)
For convenience, let
$$\begin{aligned} P_{\beta }=\begin{pmatrix} I-\beta _{0}\beta _{0}^\top &{} 0 \\ 0 &{} I \end{pmatrix}. \end{aligned}$$
Combining (A.11) and (A.12), and multiplying by \(P_{\beta }\), we obtain
$$\begin{aligned}&P_{\beta }B\sqrt{T}\begin{pmatrix} {\hat{\beta }}-\beta _{0} \\ {\hat{\theta }}-\theta _{0} \end{pmatrix} \nonumber \\&\quad =\frac{1}{\sqrt{T}}\sum _{t=1}^{T}\varepsilon _{t}P_{\beta } \begin{pmatrix} g_{0}'(\beta _{0}^\top X_{t})Y_{t-1}[X_{t}-E(X_{t}|\beta _{0}^\top X_{t})] \\ Z_{t}-\gamma ^{-1}E(Y_{t-1})Y_{t-1}E(Z_{t}|\beta _{0}^\top X_{t}) \end{pmatrix} +o_{P}(1). \end{aligned}$$
(A.13)
By Slutsky’s theorem and Theorem 4 of [9], we verify that
$$\begin{aligned} \sqrt{T}\begin{pmatrix} {\hat{\beta }}-\beta _{0} \\ {\hat{\theta }}-\theta _{0} \end{pmatrix} \overset{L}{\rightarrow }N(0,\sigma ^{2}B^{-1}V(B^{-1}){^\top }), \end{aligned}$$
where
$$\begin{aligned} V=\begin{pmatrix} V_{11} &{} V_{12} \\ V_{21} &{} V_{22} \end{pmatrix} \end{aligned}$$
with
$$\begin{aligned} V_{11}= & {} \gamma E\{[g_{0}'(\beta _{0}^\top X_{t})]^{2}[X_{t}-E(X_{t}|\beta _{0}^\top X_{t})][X_{t} -E(X_{t}|\beta _{0}^\top X_{t})]^\top \},\\ V_{12}= & {} E(Y_{t-1})E\{g_{0}'(\beta _{0}^\top X_{t})[X_{t}-E(X_{t}|\beta _{0}^\top X_{t})][Z_{t}-E(Z_{t}|\beta _{0}^\top X_{t})]^\top \}, \\ V_{21}= & {} E(Y_{t-1})E\{g_{0}'(\beta _{0}^\top X_{t})[Z_{t}-E(Z_{t}|\beta _{0}^\top X_{t})][X_{t}-E(X_{t}|\beta _{0}^\top X_{t})]^\top \},\\ V_{22}= & {} \{1-\gamma ^{-1}[E(Y_{t-1})]^{2}\}E(Z_{t}Z_{t}^\top )+\gamma ^{-1}[E(Y_{t-1})]^{2} E\{[Z_{t}\\&-E(Z_{t}|\beta _{0}^\top X_{t})][Z_{t} -E(Z_{t} |\beta _{0}^\top X_{t})]^\top \}. \end{aligned}$$
This completes the proof of Theorem 3.2. \(\square \)