unit12_cluster/prob/prob_clustering.tex

\documentclass[11pt]{article}

\usepackage{fullpage}
\usepackage{amsmath, amssymb, bm, cite, epsfig, psfrag}
\usepackage{graphicx}
\usepackage{float}
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{listings}
\usepackage{cite}
\usepackage{hyperref}
\usepackage{tikz}
\usepackage{enumerate}
\usepackage{listings}
\usepackage{mathtools}
\lstloadlanguages{Python}
\usetikzlibrary{shapes,arrows}
%\usetikzlibrary{dsp,chains}

\DeclareFixedFont{\ttb}{T1}{txtt}{bx}{n}{9} % for bold
\DeclareFixedFont{\ttm}{T1}{txtt}{m}{n}{9}  % for normal
% Defining colors
\usepackage{color}
\definecolor{deepblue}{rgb}{0,0,0.5}
\definecolor{deepred}{rgb}{0.6,0,0}
\definecolor{deepgreen}{rgb}{0,0.5,0}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

%\restylefloat{figure}
%\theoremstyle{plain}      \newtheorem{theorem}{Theorem}
%\theoremstyle{definition} \newtheorem{definition}{Definition}

\def\del{\partial}
\def\ds{\displaystyle}
\def\ts{\textstyle}
\def\beq{\begin{equation}}
\def\eeq{\end{equation}}
\def\beqa{\begin{eqnarray}}
\def\eeqa{\end{eqnarray}}
\def\beqan{\begin{eqnarray*}}
\def\eeqan{\end{eqnarray*}}
\def\nn{\nonumber}
\def\binomial{\mathop{\mathrm{binomial}}}
\def\half{{\ts\frac{1}{2}}}
\def\Half{{\frac{1}{2}}}
\def\N{{\mathbb{N}}}
\def\Z{{\mathbb{Z}}}
\def\Q{{\mathbb{Q}}}
\def\R{{\mathbb{R}}}
\def\C{{\mathbb{C}}}
\def\argmin{\mathop{\mathrm{arg\,min}}}
\def\argmax{\mathop{\mathrm{arg\,max}}}
%\def\span{\mathop{\mathrm{span}}}
\def\diag{\mathop{\mathrm{diag}}}
\def\x{\times}
\def\limn{\lim_{n \rightarrow \infty}}
\def\liminfn{\liminf_{n \rightarrow \infty}}
\def\limsupn{\limsup_{n \rightarrow \infty}}
\def\GV{Guo and Verd{\'u}}
\def\MID{\,|\,}
\def\MIDD{\,;\,}

\newtheorem{proposition}{Proposition}
\newtheorem{definition}{Definition}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{assumption}{Assumption}
\newtheorem{claim}{Claim}
\def\qed{\mbox{} \hfill $\Box$}
\setlength{\unitlength}{1mm}

\def\bhat{\widehat{b}}
\def\ehat{\widehat{e}}
\def\phat{\widehat{p}}
\def\qhat{\widehat{q}}
\def\rhat{\widehat{r}}
\def\shat{\widehat{s}}
\def\uhat{\widehat{u}}
\def\ubar{\overline{u}}
\def\vhat{\widehat{v}}
\def\xhat{\widehat{x}}
\def\xbar{\overline{x}}
\def\zhat{\widehat{z}}
\def\zbar{\overline{z}}
\def\la{\leftarrow}
\def\ra{\rightarrow}
\def\MSE{\mbox{\small \sffamily MSE}}
\def\SNR{\mbox{\small \sffamily SNR}}
\def\SINR{\mbox{\small \sffamily SINR}}
\def\arr{\rightarrow}
\def\Exp{\mathbb{E}}
\def\var{\mbox{var}}
\def\Tr{\mbox{Tr}}
\def\tm1{t\! - \! 1}
\def\tp1{t\! + \! 1}
\def\Tm1{T\! - \! 1}
\def\Tp1{T\! + \! 1}


\def\Xset{{\cal X}}

\newcommand{\one}{\mathbf{1}}
\newcommand{\abf}{\mathbf{a}}
\newcommand{\bbf}{\mathbf{b}}
\newcommand{\dbf}{\mathbf{d}}
\newcommand{\ebf}{\mathbf{e}}
\newcommand{\gbf}{\mathbf{g}}
\newcommand{\hbf}{\mathbf{h}}
\newcommand{\pbf}{\mathbf{p}}
\newcommand{\pbfhat}{\widehat{\mathbf{p}}}
\newcommand{\qbf}{\mathbf{q}}
\newcommand{\qbfhat}{\widehat{\mathbf{q}}}
\newcommand{\rbf}{\mathbf{r}}
\newcommand{\rbfhat}{\widehat{\mathbf{r}}}
\newcommand{\sbf}{\mathbf{s}}
\newcommand{\sbfhat}{\widehat{\mathbf{s}}}
\newcommand{\ubf}{\mathbf{u}}
\newcommand{\ubfhat}{\widehat{\mathbf{u}}}
\newcommand{\utildebf}{\tilde{\mathbf{u}}}
\newcommand{\vbf}{\mathbf{v}}
\newcommand{\vbfhat}{\widehat{\mathbf{v}}}
\newcommand{\wbf}{\mathbf{w}}
\newcommand{\wbfhat}{\widehat{\mathbf{w}}}
\newcommand{\xbf}{\mathbf{x}}
\newcommand{\xbfhat}{\widehat{\mathbf{x}}}
\newcommand{\xbfbar}{\overline{\mathbf{x}}}
\newcommand{\ybf}{\mathbf{y}}
\newcommand{\zbf}{\mathbf{z}}
\newcommand{\zbfbar}{\overline{\mathbf{z}}}
\newcommand{\zbfhat}{\widehat{\mathbf{z}}}
\newcommand{\Ahat}{\widehat{A}}
\newcommand{\Abf}{\mathbf{A}}
\newcommand{\Bbf}{\mathbf{B}}
\newcommand{\Cbf}{\mathbf{C}}
\newcommand{\Bbfhat}{\widehat{\mathbf{B}}}
\newcommand{\Dbf}{\mathbf{D}}
\newcommand{\Gbf}{\mathbf{G}}
\newcommand{\Hbf}{\mathbf{H}}
\newcommand{\Ibf}{\mathbf{I}}
\newcommand{\Kbf}{\mathbf{K}}
\newcommand{\Pbf}{\mathbf{P}}
\newcommand{\Phat}{\widehat{P}}
\newcommand{\Qbf}{\mathbf{Q}}
\newcommand{\Rbf}{\mathbf{R}}
\newcommand{\Rhat}{\widehat{R}}
\newcommand{\Sbf}{\mathbf{S}}
\newcommand{\Ubf}{\mathbf{U}}
\newcommand{\Vbf}{\mathbf{V}}
\newcommand{\Wbf}{\mathbf{W}}
\newcommand{\Xhat}{\widehat{X}}
\newcommand{\Xbf}{\mathbf{X}}
\newcommand{\Ybf}{\mathbf{Y}}
\newcommand{\Zbf}{\mathbf{Z}}
\newcommand{\Zhat}{\widehat{Z}}
\newcommand{\Zbfhat}{\widehat{\mathbf{Z}}}
\def\alphabf{{\boldsymbol \alpha}}
\def\betabf{{\boldsymbol \beta}}
\def\betabfhat{{\widehat{\bm{\beta}}}}
\def\epsilonbf{{\boldsymbol \epsilon}}
\def\mubf{{\boldsymbol \mu}}
\def\lambdabf{{\boldsymbol \lambda}}
\def\etabf{{\boldsymbol \eta}}
\def\xibf{{\boldsymbol \xi}}
\def\taubf{{\boldsymbol \tau}}
\def\sigmahat{{\widehat{\sigma}}}
\def\thetabf{{\bm{\theta}}}
\def\thetabfhat{{\widehat{\bm{\theta}}}}
\def\thetahat{{\widehat{\theta}}}
\def\mubar{\overline{\mu}}
\def\muavg{\mu}
\def\sigbf{\bm{\sigma}}
\def\etal{\emph{et al.}}
\def\Ggothic{\mathfrak{G}}
\def\Pset{{\mathcal P}}
\newcommand{\bigCond}[2]{\bigl({#1} \!\bigm\vert\! {#2} \bigr)}
\newcommand{\BigCond}[2]{\Bigl({#1} \!\Bigm\vert\! {#2} \Bigr)}
\newcommand{\tran}{^{\text{\sf T}}}
\newcommand{\herm}{^{\text{\sf H}}}
\newcommand{\bkt}[1]{{\langle #1 \rangle}}
\def\Norm{{\mathcal N}}
\newcommand{\vmult}{.}
\newcommand{\vdiv}{./}


% Python style for highlighting
\newcommand\pythonstyle{\lstset{
language=Python,
backgroundcolor=\color{backcolour},
commentstyle=\color{deepgreen},
basicstyle=\ttm,
otherkeywords={self},             % Add keywords here
keywordstyle=\ttb\color{deepblue},
emph={MyClass,__init__},          % Custom highlighting
emphstyle=\ttb\color{deepred},    % Custom highlighting style
stringstyle=\color{deepgreen},
%frame=tb,                         % Any extra options here
showstringspaces=false            %
}}

% Python environment
\lstnewenvironment{python}[1][]
{
\pythonstyle
\lstset{#1}
}
{}

% Python for external files
\newcommand\pythonexternal[2][]{{
\pythonstyle
\lstinputlisting[#1]{#2}}}

% Python for inline
\newcommand\pycode[1]{{\pythonstyle\lstinline!#1!}}

\begin{document}

\title{Introduction to Machine Learning\\
$K$-means and Clustering Problems}
\author{Prof. Sundeep Rangan}
\date{}

\maketitle

\begin{enumerate}


\item \label{prob:km}
You are given five data samples:

\begin{center}
\begin{tabular}{|c|c|c|c|c|c|}  \hline
$i$ & 1 & 2 & 3 & 4 & 5 \\ \hline
$x_{i1}$ & 0 & 1 & 0 & 2 & 2 \\ \hline
$x_{i2}$ & 0 & 0 & 1 & 2 & 3 \\ \hline
\end{tabular}
\end{center}


\begin{enumerate}[(a)]


\item Draw the five points.

\item Starting with $K=2$ cluster centers at $(0,0)$ and $(1,0)$,
what are the cluster assignments and new cluster centers after one
iteration of $K$-means?
\end{enumerate}

\item \emph{$K$-means for outlier detection.}  Write a
function for outlier detection:
\begin{python}
    def outlier_detect(Xtr,Xts,nc,t):
        ...
        return outlier
\end{python}
The function should:
\begin{itemize}
  \item Perform $K$-means clustering on the training data \pycode{Xtr};
  \item Given the matrix of test data \pycode{Xts},
    it sets an output \pycode{outlier[i]=1} if
    the sample \pycode{Xts[i,:]} is greater than some
    distance \pycode{t} from all cluster centers.
\end{itemize}
Try to avoid for loops.
You may assume you have the following functions:
\begin{python}
   km = KMeans(n_cluster=nc)   # Creates a K-Means object
   km.fit(X)  # Fits the k-means clusters
   km.cluster_centers_   # Returns the cluster centers
                         # (one cluster per row)
\end{python}

\item \emph{Initialization.}  Write a few lines of python
code to initialize $K$-means by selecting $K$
random samples of the training data as the cluster centers.
Make sure you do not pick the same sample twice.

\item \emph{Clustering as pre-processing.}
Suppose we want to cluster data and then
fit a linear model in each cluster.
You are given training data \pycode{Xtr,ytr}
and test data \pycode{Xts,yts} for a regression problem.
Write code to do the following:
\begin{itemize}
  \item Perform $K$-means clustering on the training data \pycode{Xtr} with a given number \pycode{nc} clusters;
  \item In each cluster in the training data,
    fit a linear model.
  \item Compute the predicted outputs \pycode{yhat_ts}
  and mean squared error of the model on the test data.
\end{itemize}
You may assume you have the following functions:
\begin{python}
   km = KMeans(n_cluster=nc)   # Creates a K-Means object
   km.fit(X)       # Fits the k-means clusters
   km.predict(X)   # Finds the index of the closest cluster

   reg = LinearRegression()   # Creates a linear regression object
   reg.fit(X,y)  # Fits the linear model
   yhat = reg.predict(X)  # Predicts the output

\end{python}
Note:  You may need a list of regression objects.


\end{enumerate}
\end{document}

\item Figure \ref{fig:samples} shows a set of samples to be clustered. Show the results from K-means algorithm in successive iterations, starting with the initial centroids indicated in the figure. You can do nearest neighbor partition and centroid update approximately by ``eyeballing''.


\begin{figure}[h]
\centering
\includegraphics[width=0.5\columnwidth]{samples.png}
\caption{The set of samples to be clustered} \label{fig:samples}
\end{figure}

  \item\label{kmeans-NN}
 Suppose you have conducted a clustering analysis for a dataset with each sample described by $D$ features, and you used K-means algorithm to derive $K$ clusters and determined the cluster model parameters (including the centroids of the $K$ clusters). Given a test dataset containing $N$ samples,  you want to classify each sample into one of the cluster using the nearest neighbor rule. How many computations are needed? For simplicity, for this and all following problems, only count multiplications (consider the square operation as  multiplication).

\item\label{kmeans}
Suppose you are given $N$ samples each described by $D$ features, and you are asked to cluster them into $K$ clusters using the K-means algorithm. Suppose you run the K-means iteration $T$ times. How many computations are needed?


 \item (Optional)
  Suppose you have conducted a clustering analysis for a dataset with each sample described by $D$ features, and you used EM-GMM algorithm to derive $K$ clusters and determined the cluster model parameters (including the prior probabilities, centroids and covariance matrices of the $K$ clusters). Given a test dataset containing $N$ samples,  you want to classify each sample into the cluster  that has the highest posterior probability. How many computations are needed?

 \item (Optional)
Suppose you are given $N$ samples each described by $D$ features, and you are asked to cluster them into $K$ clusters using the EM-GMM algorithm. Suppose you run the EM  iteration $T$ times. How many computations are needed?

\end{enumerate}
  \end{document}