\documentclass[11pt]{article}

\evensidemargin 0cm
\oddsidemargin 0cm

\topmargin -.94cm
\textwidth 16.0cm
\textheight 24.6cm

\usepackage[dvips]{epsfig}
%\usepackage{nips}
\usepackage{wrapfig}
\usepackage{subfigure}
\usepackage{graphics}
\usepackage{url}
\usepackage{natbib}
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{rotate}
\usepackage{latexsym}

\usepackage{bbold}
\newcommand{\real}{\mathbb R}
\newcommand{\iden}{\mathbb 1}

\input xy
\xyoption{all}
\xyoption{dvips}
\xyoption{color}
%\input{../xyps-col.tex}

\newcommand{\scalefac}{1}
\newcommand{\comment}[1]{}

%\newcommand{\proof}[1]{\vspace*{5mm} \noindent {{\small \bf Proof.} \small #1}%
%\vspace*{3mm}}
%\newcommand{\example}[2]{\vspace*{3mm} \noindent {\bf Example #1.} #2%
%\vspace*{1mm}}

\newcommand{\onder}[2]{#1_{\mbox{\rm \scriptsize #2}}}
\newcommand{\boven}[2]{#1^{\mbox{\rm \scriptsize #2}}}
\newcommand{\Xmap}{\boven{X}{map}}
\newcommand{\mycite}[1]{(\cite{#1})}
\newcommand{\myciteyear}[1]{(\citeyear{#1})}
\newcommand{\qold}{\boven{q}{old}}
\newcommand{\Pold}{\boven{P}{old}}
\newcommand{\qnew}{\boven{q}{new}}
\newcommand{\Pnew}{\boven{P}{new}}
\newcommand{\mnew}{\boven{\mu}{new}}
\newcommand{\bgamma}{\bar{\gamma}}
\newcommand{\blambda}{\bar{\lambda}}
\newcommand{\hlambda}{\hat{\lambda}}
\newcommand{\hbeta}{\hat{\beta}}
\newcommand{\hy}{\hat{y}}
\newcommand{\he}{\hat{e}}
\newcommand{\hsigma}{\hat{\sigma}}
\newcommand{\halpha}{\hat{\alpha}}
\newcommand{\blnew}{\boven{\bar{\lambda}}{new}}
\newcommand{\tlambda}{\tilde{\lambda}}
\newcommand{\bmu}{\bar{\mu}}
\newcommand{\tmu}{\tilde{\mu}}
\newcommand{\tm}{\tilde{m}}
\newcommand{\ttheta}{\tilde{\theta}}
\newcommand{\hm}{\hat{m}}
\newcommand{\htheta}{\hat{\theta}}
\newcommand{\KL}{\mbox{KL}}
\newcommand{\ev}[1]{\left<#1\right>}
\newcommand{\collapse}{\mathop{\rm Collapse}\limits}
\newcommand{\argmax}{\mathop{\rm argmax}\limits}
\newcommand{\argmin}{\mathop{\rm argmin}\limits}
\newcommand{\cmin}{\mathop{\rm min'}\limits}
\newcommand{\cmax}{\mathop{\rm max'}\limits}
\newcommand{\ninp}{\onder{n}{inp}}
\newcommand{\Tr}{\mbox{Tr~}}
\newcommand{\vc}[1]{{\bf #1}}
\newcommand{\vb}[1]{\mbox{\boldmath $#1$}}
\newcommand{\vs}[1]{\mbox{\boldmath \scriptsize $#1$}}
\newcommand{\sss}[1]{\underline{#1}}
\newcommand{\stheta}{\sss{\theta}}
\newcommand{\vx}{\vc{x}}
\newcommand{\sx}{\sss{x}}
\newcommand{\se}{\sss{e}}
\newcommand{\sy}{\sss{y}}
\newcommand{\sz}{\sss{z}}
\newcommand{\spi}{\sss{\pi}}
\newcommand{\svx}{\sss{\vx}}
\newcommand{\vX}{\vc{X}}
\newcommand{\sX}{\sss{X}}
\newcommand{\bX}{\bar{X}}
\newcommand{\by}{\bar{y}}
\newcommand{\bx}{\bar{x}}
\newcommand{\vy}{\vc{y}}
\newcommand{\vt}{\vc{t}}
\newcommand{\vY}{\vc{Y}}
\newcommand{\vz}{\vc{z}}
\newcommand{\vf}{\vc{f}}
\newcommand{\vg}{\vc{g}}
\newcommand{\vm}{\vc{m}}
\newcommand{\vepsilon}{\vc{\epsilon}}
\newcommand{\vmmin}{\vm^{-}}
\newcommand{\vmnul}{\vm^{0}}
\newcommand{\vmplus}{\vm^{+}}
\newcommand{\vM}{\vc{M}}
\newcommand{\hvm}{\hat{\vc{m}}}
\newcommand{\vtheta}{\mbox{\boldmath $\theta$}}
\newcommand{\vT}{\mbox{\boldmath $\Theta$}}
\newcommand{\vxi}{\mbox{\boldmath $\xi$}}
\newcommand{\eye}{{\mathbb 1}}
\newcommand{\zero}{{\mathbb 0}}
\newcommand{\tP}{\tilde{P}}
\newcommand{\hP}{\hat{P}}
\newcommand{\hp}{p}
%\newcommand{\hp}{\hat{p}}
\newcommand{\pold}{\boven{\hp}{old}}
\newcommand{\hV}{\hat{V}}
\newcommand{\ee}{\mbox{e}}
\newcommand{\evnul}[1]{\ev{#1}_{q_t}}
\newcommand{\evbac}[1]{\ev{#1}_{\hp_t}}
\newcommand{\evfor}[1]{\ev{#1}_{\hp_{t+1}}}
\newcommand{\valpha}{\vb{\alpha}}
\newcommand{\vmu}{\vb{\mu}}
\newcommand{\hmu}{\hat{\mu}}
\newcommand{\vbeta}{\vb{\beta}}
\newcommand{\hvbeta}{\hat{\vbeta}}
\newcommand{\hvtheta}{\hat{\vtheta}}
\newcommand{\vgamma}{\vb{\gamma}}
\newcommand{\vGamma}{\vb{\Gamma}}
\newcommand{\vdelta}{\vb{\delta}}
\newcommand{\vDelta}{\vb{\Delta}}
\newcommand{\vsalpha}{\vs{\alpha}}
\newcommand{\vsbeta}{\vs{\beta}}
\newcommand{\vsgamma}{\vs{\gamma}}
\newcommand{\vsdelta}{\vs{\delta}}
\newcommand{\talpha}{\tilde{\valpha}}
\newcommand{\tbeta}{\tilde{\vbeta}}
\newcommand{\tgamma}{\tilde{\vgamma}}
\newcommand{\tdelta}{\tilde{\vdelta}}
\newcommand{\dalpha}{\partial \alpha}
\newcommand{\dbeta}{\partial \beta}
\newcommand{\dgamma}{\partial \gamma}
\newcommand{\ddelta}{\partial \delta}

\newcommand{\evv}[1]{\ev{\ev{#1}}}
\newcommand{\evvnul}[1]{\evv{#1}_{q_t}}
\newcommand{\evvbac}[1]{\evv{#1}_{\hp_t}}
\newcommand{\evvfor}[1]{\evv{#1}_{\hp_{t+1}}}

\newcommand{\gnew}{\boven{\gamma}{new}}
\newcommand{\lnew}{\boven{\lambda}{new}}
\newcommand{\gold}{\boven{\gamma}{old}}
\newcommand{\lold}{\boven{\lambda}{old}}
\newcommand{\dnew}{\boven{\vdelta}{new}}
\newcommand{\dold}{\boven{\vdelta}{old}}
\newcommand{\anew}{\boven{\valpha}{new}}
\newcommand{\aold}{\boven{\valpha}{old}}
\newcommand{\bnew}{\boven{\vbeta}{new}}
\newcommand{\bold}{\boven{\vbeta}{old}}

\newcommand{\Var}{\mbox{Var}}
\newcommand{\RSS}{\mbox{RSS}}
\newcommand{\lik}{\mbox{lik}}
\newcommand{\TrainError}{\mbox{TE}}
\newcommand{\PredError}{\mbox{PE}}
\newcommand{\ValError}{\mbox{VE}}

\newenvironment{proof}{\begin{quotation} {\it Proof.\ }}
			{$\Box$ \end{quotation}}
\newtheorem{proposition}{Proposition}


\begin{document}

\section*{Angular Distribution in Muon Decay}

The positive muon decays via the parity-violating weak interaction
to produce an energetic positron and two neutrinos: 
\[
\mu^+ \rightarrow e^+ + \nu_e + \bar{\nu}_\mu \: .
\]
The angle $\theta$ at which the positrons are emitted is preferentially
in the direction of the muon spin (polarization) at the instant of decay.

\vspace*{5mm}
\noindent
\hspace*{-1cm}
%\vspace*{5mm}
\begin{minipage}{0.6\textwidth}
\begin{center}
\epsfig{file=plaatje2.eps,width=0.7\textwidth}
\end{center}
\end{minipage}
\hfill
\begin{minipage}{0.4\textwidth}
Schematic illustration of the angular distribution of positrons from
muon decay. The length of the arrow represents the relative probability
of positron emittance in the direction of the arrow. The angle $\theta$
is with respect to the direction of the muon spin indicated by the solid
arrow, i.e., in the direction of the positive $x$-axis.
\end{minipage}
\vspace*{5mm}


More specifically, the probability density of a positron being emitted in
a direction at an angle $\theta$ to the spin is given by
\begin{equation}
f_{\cos \theta}(\cos \theta | \alpha) = {1 + \alpha \cos \theta \over 2}
\: .
\label{density}
\end{equation}
Here $\alpha$ is called the ``asymmetry''. 
%Physical considerations
%dictate that $| \alpha | \le \frac{1}{3}$, but we note that $f_{\cos
%\theta}(\cos \theta | \alpha)$ is a probability density function for
%$| \alpha | \le 1$. 

More information about muon decay and the (experimental) difficulties involved
can be found on {\verb#http://musr.physics.ubc.ca/theses/Morris/node8.html#}
and\\ {\verb#http://phycomp.technion.ac.il/~zaher/proposal/node19.html#}. Here
we will consider an artificial data set, drawn from the
distribution~(\ref{density}).

\begin{description}
\item[a.] Equation~(\ref{density}) gives the probability density function
in terms of $\cos \theta$. What is the probability density function in
terms of $\theta$ itself?
\end{description}

\noindent
It will be most convenient, however, to stick to the density
function~(\ref{density}) in terms of $x = \cos \theta$.

\begin{description}
\item[b.] Build a function {\tt randangle} that takes
parameters {\tt alpha}, {\tt m}, and {\tt n}, and returns the matrix
{\tt x} of size {\tt m} $\times$ {\tt n} with (pseudo-)random values
drawn according to the distribution~(\ref{density}).

{\tt function x = randangle(alpha,m,n);}

Try to use {\sc Matlab}'s matrix routines as much as you can. Check
your implementation by making a histogram of many randomly drawn
values.
\end{description}

\noindent
Our (artificial) data set consists of the following $n=100$ observations:
{\scriptsize
\[
\begin{array}{rlrrrrrrrrrrr}
\stheta = &
%[ & -1.8859 & -1.6499 & 1.8747 & -2.3665 & -1.4153 & 2.0805 \\
%& & -1.3670 & 2.0465 & -1.2071 & -2.1666 & 1.8962 & -2.3751 \\
%& & -0.8423 & 0.4954 & 0.5686 & 0.5676 & -2.6372 & 1.9737 \\
%& & 1.3721 & 2.4183 & -0.8377 & 1.3099 & 0.6244 & -1.3559 \\
%& & 0.9528 & 1.0299 & -2.1901 & -1.3679 & -1.6277 & -1.6336 & ];\end{array}
%\]
%
[& 2.6764 & 0.8894 & 1.4354 & 1.5006 & 0.9046 &
-1.3146 & 1.6474 & -0.1975 & -1.2747 & -0.9988 \\
& & 1.1555 & -1.6281 & 2.1967 & -1.0610 & 1.2414 &
0.6162 & -2.9699 & 1.5531 & 2.5216 & 2.1182 \\
& & -1.8529 & -1.4210 & 1.9418 & 1.4794 & -1.7858 &
1.8154 & 2.8942 & -0.9977 & -0.5202 & -2.0464 \\
& & 1.5976 & 1.7503 & 2.2237 & -2.0187 & -0.3506 &
0.7647 & -2.4849 & 1.1087 & -0.8247 & 0.8363 \\
& & -0.4932 & -0.7828 & 2.3109 & 0.5433 & -0.0738 &
2.4771 & 2.4685 & 1.3219 & -1.2041 & -1.2495 \\
& & -0.1149 & -1.3569 & -1.7768 & -1.3162 & -1.3235 &
0.4888 & 0.4864 & 1.9602 & -2.7259 & -1.3423 \\
& & 0.8733 & 2.2035 & -1.8747 & 1.0333 & -0.8709 &
2.0805 & 2.8855 & -3.0309 & 1.7713 & 0.5745 \\
& & 1.0792 & 0.7908 & -1.6818 & 2.0775 & 0.2353 &
1.5304 & -1.8393 & -1.7928 & -1.9249 & 2.4183 \\
& & 0.8377 & -0.4723 & -0.1739 & -1.3559 & 1.8984 &
1.4318 & 1.1355 & -1.0589 & 0.9312 & 2.7828 \\
& & -1.2460 & 2.6038 & 1.6926 & 0.9147 & 2.3276 &
-1.1876 & 0.8835 & -2.1147 & 1.5013 & -2.7682 & ];\end{array}
\]
}
This data can also be found on the %course 
website
{\verb#http://www.mbfys.kun.nl/~tom/teaching.html#} or downloaded directly
from {\verb#http://www.mbfys.kun.nl/~tom/files/angulardata.dat#}.

\begin{description}
\item[c.] Use the method of moments to estimate $\alpha$ from this data.
\end{description}

\begin{description}
\item[d.] Use the parametric bootstrap method to generate $B = 10000$
samples from the (approximate) sampling distribution of the method of
moments estimate $\halpha$. Draw a histogram of these samples.
\end{description}

\begin{description}
\item[e.] Compute the mean and standard deviation of the sampling
distribution, both from your bootstrap samples and analytically. Are these
consistent? Is the method of moment estimate unbiased in this case? How
would you report your estimate of the parameter $\alpha$ given these
results?
\end{description}

\begin{description}
\item[f.] Plot the log likelihood for different values of $\alpha$.
Give an analytic expression for the maximum likelihood estimate.
Use either {\sc Matlab}'s {\tt fzero} or one of the routines you yourself
constructed in Exercise~1 (e.g., the bisection routine) to find the maximum
likelihood estimate for the given data set.
\end{description}

\begin{description}
\item[g.] Use the parametric bootstrap to generate 10000 samples of the
approximate sampling distribution of the maximum likelihood estimate.
Is the maximum likelihood estimate in theory biased or unbiased? What do
the bootstrap samples suggest? Compare the variance of the ``maximum
likelihood sampling distribution'' with the ``method of moments sampling
distribution''.
\end{description}

\begin{description}
\item[h. (optional)] Find a nice way to visualize your result.
\end{description}
\newpage

\section*{Hints for Exercise~4.}

\begin{description}
\item[a.] See the syllabus, section~2.3.3.
Check that
\[ \int_{-1}^1 d \cos \theta \: f_{\cos \theta}(\cos \theta | \alpha) = 1
\]
and also
\[
\int_{-\pi}^\pi d\theta \: f_\theta(\theta|\alpha) = 1 \: .
\]
Note that the lengths of the arrows in the figure are proportional to
$f_{\cos \theta}(\cos \theta | \alpha)$. Whereas $f_{\cos \theta}$ has a mode at 0, $f_{\theta}$ 
does not. 
\end{description}

\begin{description}
\item[b.] There are several ways of doing this, a straightforward
approach uses the following proposition:
\begin{proposition}
Let $U$ be uniform on $[0,1]$, and let $X = F^{-1}(U)$. Then the cumulative
distribution function of $X$ is $F$.
\end{proposition}
\begin{proof}
$ P(X \le x) = P(F^{-1}(U) \le x) = P(U \le F(x)) = F(x) $
\end{proof}
\begin{enumerate}
\item {\sc Matlab}'s {\tt rand} generates uniform (pseudo) random values
$x$ between 0 and 1.
\item Compute $F(x)$ and $F^{-1}(x)$ for the density function~(\ref{density}).
Although in principle $F$ does not have a unique inverse you can 
reason which one is appropriate.
\item Although you can implement the routine using for-loops, {\sc
Matlab} works a lot faster if you manipulate entire matrices instead
of individual entries in for-loops. You can e.g. use {\tt rand} to generate a
matrix of (pseudo) random numbers, and make $F^{-1}$ work on matrices.
%Or alternatively you can generate a vector of the appropriate size
%using {\tt rand}, make $F^{-1}$ work on vectors, and use {\tt reshape}
%to obtain a matrix of the required size.
\end{enumerate}

\comment{
\item[b.] We'll take a couple of small steps. If you cannot get this
to work in a reasonable amount of time (an hour or so), contact Onno and
ask him to give you a solution: you'll need it later on.
\begin{enumerate}
\item {\sc Matlab}'s {\tt rand} generates uniform (pseudo) random values
$x$ between 0 and 1. Check that $y = 2x - 1$ is then uniformly distributed
between -1 and 1.
\item Consider two independent random variables $y_1$ and $y_2$, uniformly
distributed between -1 and 1. What is the density function of
$y = max(y_1,y_2)$ and $y = min(y_1,y_2)$? This observation should suggest
to you how to generate random variables $x$ between $-1$ and $1$ that
are distributed according to either $f(x) = (1+x)/2$ or $f(x) = (1-x)/2$.
Build a function generating (a matrix of) such random variables and draw
a histogram to check your implementation.
\item Consider two distributions $f_1(x)$ and $f_2(x)$ and suppose that we
need random values from
\[
f(x) = \gamma f_1(x) + (1-\gamma) f_2(x) \: ,
\]
with $0 \leq \gamma \leq 1$. Show that we can do so by drawing from $f_1(x)$
with probability $\gamma$ and from $f_2(x)$ with probability $1-\gamma$.
\item With probability 0.7 a random variable, uniformly distributed between
0 and 1, is smaller than 0.7. How can you use {\tt rand} to generate a
random value that equals $1$ with probability $\gamma$ and $0$ with
probability $1-\gamma$?
\item Combine these ideas to draw values from
\[
f(x|\alpha) = {1 + \alpha x \over 2} = \left\{
\begin{array}{cc}
\displaystyle
(1-|\alpha|) {1 \over 2} + |\alpha| {x + 1 \over 2} & \mbox{~for~~}
\alpha \geq 0 \\[3mm]
\displaystyle
(1-|\alpha|) {1 \over 2} + |\alpha| {1 - x \over 2} & \mbox{~for~~}
\alpha \leq 0 \end{array} \right. \: .
\]
\end{enumerate}
In the implementation itself, use matrices as much as practically
feasible. For example, code in your routine might include something like
\begin{verbatim}
xx = 2*rand(m,n,2)-1;
if alpha > 0,
  x1 = max(xx,[],3);
else
  x1 = min(xx,[],3);
end
\end{verbatim}
with {\tt m} and {\tt n} the dimensions of the matrix and the construction
{\tt max(xx,[],3)} to take the maximum over the third (added) dimension.
(see {\tt help max}).}
\end{description}

\begin{description}
\item[c.] Store the data from the website locally and read it into {\sc Matlab}
using {\tt load angulardata.dat}. This gives you a variable named
{\tt angulardata} with angles $\theta$.

Express the mean of the density~(\ref{density}) in terms of the
parameter $\alpha$. Transform the angles $\theta$ into $x = \cos \theta$,
compute the sample average, and deduce $\halpha$. See section~4.3 in the
syllabus (be careful: in the syllabus $\theta$ stands for the parameter(s)
of the probability model, not for the data).
\end{description}

\begin{description}
\item[d.] The parametric bootstrap is described in section~4.6 of the
syllabus and should be relatively straightforward to implement given this
description:
\begin{itemize}
\item Generate many different samples $\sx_b$ of length $n=100$ given the method
of moment estimate $\halpha$ and using the routine {\tt randangle} that you
have constructed before.
\item Compute a new method of moment estimate $\halpha_b$ for each of these
samples.
\end{itemize}
Be careful: {\sc Matlab} also has a bootstrap function
(called {\tt bootstrp}), but this is an implementation of the so-called
nonparametric bootstrap method that will be discussed in chapter~7.
\end{description}

\begin{description}
\item[e.] Following example 4.1 on the sampling distribution for the Poisson
distribution in section 4.4 of the syllabus, you should be able to compute
the mean and variance of the sampling distribution for the method of moment
estimate in this case. Note that computing the exact sampling distribution 
is much more complicated here. The notion of unbiasedness is discussed in
both section 4.4 and section 4.8.
\end{description}


\begin{description}
\item[f.] Maximum likelihood estimation is explained in section~4.7 of the
syllabus. Taking the derivative of the log likelihood with respect to $\alpha$,
you will find a nonlinear equation for $\alpha$ that must be solved
iteratively. To this end, you can use {\sc Matlab}'s {\tt fzero}. The most
elegant way is to first construct an m-file that implements the derivative
of the log likelihood with respect to the parameter $\alpha$ and takes as
inputs this parameter $\alpha$ and the data vector $\sx$. The name of this
function, e.g., {\tt gradangular} is then given to {\tt fzero} (see {\tt
help fzero} for the correct syntax; note that the name of the function should
be between `'). For example, a call could look like {\tt alpha =
fzero(`gradangular',alpha0,[],x);}, with {\tt alpha0} an initial estimate
(e.g., the method of moment estimate) and {\tt x} the data vector $\sx$.
Another alternative is to minimize ``minus the log likelihood'' with
{\sc Matlab}'s {\tt fminunc}. However, although probably a little faster
especially when gradient information is included, the syntax is somewhat
more complicated. For example, when incorporating gradient information
the function to be minimized (minus the log likelihood) should return the
function value itself as its first argument and its gradient as the second
argument. Furthermore, options have to be included that can be set with
something like {\tt options = optimset('GradObj','on');}.
\end{description}

\begin{description}
\item[g.] Bootstrapping proceeds as in {\bf d.}, but is now slightly more
complicated since we have to use some iterative procedure as a subroutine
to find the maximum likelihood estimate $\halpha_b$ given bootstrap sample
$\sx_b$. Drawing 10000 samples might take some time, so perhaps you want
to add some print statements using {\tt disp} or {\tt fprintf} to keep track
of the progress.

Note that the question is whether the maximum likelihood estimate
is biased or unbiased in this specific finite-sample case, i.e., not whether
it is asymptotically unbiased.
\end{description}

\begin{description}
\item[h.] See what this does:
\begin{verbatim}
clf
hold on     % little trick, gridlines appear when left out
alpha = 0.6;
theta = -pi:0.01:pi;
f = (1 + alpha*cos(theta))/2;
polar(theta,f,'--');
axis off
\end{verbatim}
\end{description}


\end{document}
