-
Notifications
You must be signed in to change notification settings - Fork 401
/
Copy pathprob_logistic.tex
341 lines (304 loc) · 10.8 KB
/
prob_logistic.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
\documentclass[11pt]{article}
\usepackage{fullpage}
\usepackage{amsmath, amssymb, bm, cite, epsfig, psfrag}
\usepackage{graphicx}
\usepackage{float}
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{listings}
\usepackage{cite}
\usepackage{hyperref}
\usepackage{tikz}
\usepackage{enumerate}
\usepackage{listings}
\usepackage{mathtools}
\lstloadlanguages{Python}
\usetikzlibrary{shapes,arrows}
%\usetikzlibrary{dsp,chains}
\DeclareFixedFont{\ttb}{T1}{txtt}{bx}{n}{9} % for bold
\DeclareFixedFont{\ttm}{T1}{txtt}{m}{n}{9} % for normal
% Defining colors
\usepackage{color}
\definecolor{deepblue}{rgb}{0,0,0.5}
\definecolor{deepred}{rgb}{0.6,0,0}
\definecolor{deepgreen}{rgb}{0,0.5,0}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
%\restylefloat{figure}
%\theoremstyle{plain} \newtheorem{theorem}{Theorem}
%\theoremstyle{definition} \newtheorem{definition}{Definition}
\def\del{\partial}
\def\ds{\displaystyle}
\def\ts{\textstyle}
\def\beq{\begin{equation}}
\def\eeq{\end{equation}}
\def\beqa{\begin{eqnarray}}
\def\eeqa{\end{eqnarray}}
\def\beqan{\begin{eqnarray*}}
\def\eeqan{\end{eqnarray*}}
\def\nn{\nonumber}
\def\binomial{\mathop{\mathrm{binomial}}}
\def\half{{\ts\frac{1}{2}}}
\def\Half{{\frac{1}{2}}}
\def\N{{\mathbb{N}}}
\def\Z{{\mathbb{Z}}}
\def\Q{{\mathbb{Q}}}
\def\R{{\mathbb{R}}}
\def\C{{\mathbb{C}}}
\def\argmin{\mathop{\mathrm{arg\,min}}}
\def\argmax{\mathop{\mathrm{arg\,max}}}
%\def\span{\mathop{\mathrm{span}}}
\def\diag{\mathop{\mathrm{diag}}}
\def\x{\times}
\def\limn{\lim_{n \rightarrow \infty}}
\def\liminfn{\liminf_{n \rightarrow \infty}}
\def\limsupn{\limsup_{n \rightarrow \infty}}
\def\GV{Guo and Verd{\'u}}
\def\MID{\,|\,}
\def\MIDD{\,;\,}
\newtheorem{proposition}{Proposition}
\newtheorem{definition}{Definition}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{assumption}{Assumption}
\newtheorem{claim}{Claim}
\def\qed{\mbox{} \hfill $\Box$}
\setlength{\unitlength}{1mm}
\def\bhat{\widehat{b}}
\def\ehat{\widehat{e}}
\def\phat{\widehat{p}}
\def\qhat{\widehat{q}}
\def\rhat{\widehat{r}}
\def\shat{\widehat{s}}
\def\uhat{\widehat{u}}
\def\ubar{\overline{u}}
\def\vhat{\widehat{v}}
\def\xhat{\widehat{x}}
\def\xbar{\overline{x}}
\def\zhat{\widehat{z}}
\def\zbar{\overline{z}}
\def\la{\leftarrow}
\def\ra{\rightarrow}
\def\MSE{\mbox{\small \sffamily MSE}}
\def\SNR{\mbox{\small \sffamily SNR}}
\def\SINR{\mbox{\small \sffamily SINR}}
\def\arr{\rightarrow}
\def\Exp{\mathbb{E}}
\def\var{\mbox{var}}
\def\Tr{\mbox{Tr}}
\def\tm1{t\! - \! 1}
\def\tp1{t\! + \! 1}
\def\Tm1{T\! - \! 1}
\def\Tp1{T\! + \! 1}
\def\Xset{{\cal X}}
\newcommand{\one}{\mathbf{1}}
\newcommand{\abf}{\mathbf{a}}
\newcommand{\bbf}{\mathbf{b}}
\newcommand{\dbf}{\mathbf{d}}
\newcommand{\ebf}{\mathbf{e}}
\newcommand{\gbf}{\mathbf{g}}
\newcommand{\hbf}{\mathbf{h}}
\newcommand{\pbf}{\mathbf{p}}
\newcommand{\pbfhat}{\widehat{\mathbf{p}}}
\newcommand{\qbf}{\mathbf{q}}
\newcommand{\qbfhat}{\widehat{\mathbf{q}}}
\newcommand{\rbf}{\mathbf{r}}
\newcommand{\rbfhat}{\widehat{\mathbf{r}}}
\newcommand{\sbf}{\mathbf{s}}
\newcommand{\sbfhat}{\widehat{\mathbf{s}}}
\newcommand{\ubf}{\mathbf{u}}
\newcommand{\ubfhat}{\widehat{\mathbf{u}}}
\newcommand{\utildebf}{\tilde{\mathbf{u}}}
\newcommand{\vbf}{\mathbf{v}}
\newcommand{\vbfhat}{\widehat{\mathbf{v}}}
\newcommand{\wbf}{\mathbf{w}}
\newcommand{\wbfhat}{\widehat{\mathbf{w}}}
\newcommand{\xbf}{\mathbf{x}}
\newcommand{\xbfhat}{\widehat{\mathbf{x}}}
\newcommand{\xbfbar}{\overline{\mathbf{x}}}
\newcommand{\ybf}{\mathbf{y}}
\newcommand{\zbf}{\mathbf{z}}
\newcommand{\zbfbar}{\overline{\mathbf{z}}}
\newcommand{\zbfhat}{\widehat{\mathbf{z}}}
\newcommand{\Ahat}{\widehat{A}}
\newcommand{\Abf}{\mathbf{A}}
\newcommand{\Bbf}{\mathbf{B}}
\newcommand{\Cbf}{\mathbf{C}}
\newcommand{\Bbfhat}{\widehat{\mathbf{B}}}
\newcommand{\Dbf}{\mathbf{D}}
\newcommand{\Gbf}{\mathbf{G}}
\newcommand{\Hbf}{\mathbf{H}}
\newcommand{\Ibf}{\mathbf{I}}
\newcommand{\Kbf}{\mathbf{K}}
\newcommand{\Pbf}{\mathbf{P}}
\newcommand{\Phat}{\widehat{P}}
\newcommand{\Qbf}{\mathbf{Q}}
\newcommand{\Rbf}{\mathbf{R}}
\newcommand{\Rhat}{\widehat{R}}
\newcommand{\Sbf}{\mathbf{S}}
\newcommand{\Ubf}{\mathbf{U}}
\newcommand{\Vbf}{\mathbf{V}}
\newcommand{\Wbf}{\mathbf{W}}
\newcommand{\Xhat}{\widehat{X}}
\newcommand{\Xbf}{\mathbf{X}}
\newcommand{\Ybf}{\mathbf{Y}}
\newcommand{\Zbf}{\mathbf{Z}}
\newcommand{\Zhat}{\widehat{Z}}
\newcommand{\Zbfhat}{\widehat{\mathbf{Z}}}
\def\alphabf{{\boldsymbol \alpha}}
\def\betabf{{\boldsymbol \beta}}
\def\betabfhat{{\widehat{\bm{\beta}}}}
\def\epsilonbf{{\boldsymbol \epsilon}}
\def\mubf{{\boldsymbol \mu}}
\def\lambdabf{{\boldsymbol \lambda}}
\def\etabf{{\boldsymbol \eta}}
\def\xibf{{\boldsymbol \xi}}
\def\taubf{{\boldsymbol \tau}}
\def\sigmahat{{\widehat{\sigma}}}
\def\thetabf{{\bm{\theta}}}
\def\thetabfhat{{\widehat{\bm{\theta}}}}
\def\thetahat{{\widehat{\theta}}}
\def\mubar{\overline{\mu}}
\def\muavg{\mu}
\def\sigbf{\bm{\sigma}}
\def\etal{\emph{et al.}}
\def\Ggothic{\mathfrak{G}}
\def\Pset{{\mathcal P}}
\newcommand{\bigCond}[2]{\bigl({#1} \!\bigm\vert\! {#2} \bigr)}
\newcommand{\BigCond}[2]{\Bigl({#1} \!\Bigm\vert\! {#2} \Bigr)}
\newcommand{\tran}{^{\text{\sf T}}}
\newcommand{\herm}{^{\text{\sf H}}}
\newcommand{\bkt}[1]{{\langle #1 \rangle}}
\def\Norm{{\mathcal N}}
\newcommand{\vmult}{.}
\newcommand{\vdiv}{./}
% Python style for highlighting
\newcommand\pythonstyle{\lstset{
language=Python,
backgroundcolor=\color{backcolour},
commentstyle=\color{deepgreen},
basicstyle=\ttm,
otherkeywords={self}, % Add keywords here
keywordstyle=\ttb\color{deepblue},
emph={MyClass,__init__}, % Custom highlighting
emphstyle=\ttb\color{deepred}, % Custom highlighting style
stringstyle=\color{deepgreen},
%frame=tb, % Any extra options here
showstringspaces=false %
}}
% Python environment
\lstnewenvironment{python}[1][]
{
\pythonstyle
\lstset{#1}
}
{}
% Python for external files
\newcommand\pythonexternal[2][]{{
\pythonstyle
\lstinputlisting[#1]{#2}}}
% Python for inline
\newcommand\pycode[1]{{\pythonstyle\lstinline!#1!}}
\begin{document}
\title{Introduction to Machine Learning\\
Problems: Logistic Regression}
\author{Prof. Sundeep Rangan}
\date{}
\maketitle
\begin{enumerate}
\item Suggest possible response variables and predictors for the following classification
problems. For each problem, indicate how many classes there are. There is no single
correct answer.
\begin{enumerate}[(a)]
\item Given an audio sample, to detect the gender of the voice.
\item A electronic writing pad records motion of a stylus
and it is desired to determine
which letter or number was written. Assume a segmentation algorithm is already run
which indicates very reliably the beginning and end time of the writing
of each character.
\end{enumerate}
\item Suppose that a logistic regression model for a binary class
label $y=0,1$ is given by
\[
P(y=1|\xbf) = \frac{1}{1+e^{-z}}, \quad z = \beta_0 + \beta_1x_1 + \beta_2 x_2,
\]
where $\betabf = [1,2,3]\tran$. Describe the following sets:
\begin{enumerate}[(a)]
\item The set of $\xbf$ such that $P(y=1|\xbf) > P(y=0|\xbf)$.
\item The set of $\xbf$ such that $P(y=1|\xbf) > 0.8$.
\item The set of $x_1$ such that $P(y=1|\xbf) > 0.8$ and $x_2=0.5$.
\end{enumerate}
\item A data scientist is hired by a
political candidate to predict who will donate
money. The data scientist decides to use two predictors for each possible donor:
\begin{itemize}
\item $x_1$ = the income of the person (in thousands of dollars), and
\item $x_2$ = the number of websites with similar political views as the
candidate the person follow on Facebook.
\end{itemize}
To train the model, the scientist
tries to solicit donations from a randomly selected subset
of people and records who donates or not. She obtains the following data:
\begin{center}
\begin{tabular}{|l|c|c|c|c|c|} \hline
Income (thousands \$), $x_{i1}$ & 30 & 50 & 70 & 80 & 100 \\ \hline
Num websites, $x_{i2}$ & 0 & 1 & 1 & 2 & 1 \\ \hline
Donate (1=yes or 0=no), $y_i$ & 0 & 1 & 0 & 1 & 1 \\ \hline
\end{tabular}
\end{center}
\begin{enumerate}[(a)]
\item Draw a scatter plot of the data labeling the two classes with different markers.
\item Find a linear classifier that makes at most one error on the training data.
The classifier should be of the form,
\[
\hat{y}_i = \begin{cases}
1 & \mbox{if } z_i > 0\\
0 & \mbox{if } z_i < 0,
\end{cases}
\quad
z_i = \wbf\tran\xbf_i + b.
\]
What is the weight vector $\wbf$ and bias $b$ in your classifier?
\item Now consider a logistic model of the form,
\[
P(y_i=1|\xbf_i) = \frac{1}{1+e^{-z_i}}, \quad z_i = \wbf\tran\xbf_i +b.
\]
Using $\wbf$ and $b$ from the previous part, which sample $i$ is the
\emph{least} likely (i.e.\ $P(y_i|\xbf_i)$ is the smallest).
If you do the calculations correctly, you should not need a calculator.
\item Now consider a new set of parameters
\[
\wbf' = \alpha \wbf, \quad b'=\alpha b,
\]
where $\alpha>0$ is a positive scalar. Would using the new parameters
change the values $\hat{y}$ in part (b)? Would they change the
likelihoods $P(y_i|\xbf_i)$ in part (c)? If they do not change, state why.
If they do change, qualitatively describe the change as a function of $\alpha$.
% \item Complete the following python function to generate a vector
% of random labels \pycode{y}, where \pycode{y[i]} uses the data
% record in \pycode{X[i,:]}, weight vector \pycode{w} and bias \pycode{b}.
% \begin{python}
% import numpy as np
% def gen_rand(X,w,b):
% ...
% return y
% \end{python}
\end{enumerate}
\item Suppose we collect data for a group of students in a machine learning class
with variables $X_1 =$ hours studied, $X_2 =$ undergrad GPA, and $Y =$
receive an A. We fit a logistic regression and produce estimated
coefficient, $\beta_0 = -6, \;\beta_1 = 0.05,\; \beta_2 = 1$.
\begin{enumerate}
\item Estimate the probability that a student who studies for 40 h and
has an undergrad GPA of 3.5 gets an A in the class.
\item How many hours would the student in part (a) need to study to
have a 50 \% chance of getting an A in the class?
\end{enumerate}
\item The loss function for logistic regression for binary classification is the binary cross entropy defined as $$J(\betabf)=\sum_{i=1}^N\ln(1+e^{z_i})-y_iz_i$$ where $z_i = \beta_0+\beta_1x_{1i}+\beta_2x_{2i}$ for two features $x_{1,i}$ and $x_{2,i}$.
\begin{enumerate}
\item What are the partial derivatives of $z_i$ with respect to $\beta_0$, $\beta_1$, and $\beta_2$.
\item Compute the partial derivatives of $J(\betabf)$ with respect to $\beta_0$, $\beta_1$, and $\beta_2$. You should use the chain rule of differentiation.
\item Can you find the close form expressions for the optimal parameters $\hat{\beta}_0$, $\hat{\beta}_1$, and $\hat{\beta}_2$ by putting the derivatives of $J(\betabf)$ to 0? What methods can be used to optimize the loss function $J(\betabf)$?
\end{enumerate}
\end{enumerate}
\end{document}