\documentclass[11pt]{article}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{fullpage}
\usepackage[capitalise,nameinlink]{cleveref}
\crefname{lemma}{Lemma}{Lemmas}
\usepackage{graphicx}
\crefname{fact}{Fact}{Facts}
\crefname{theorem}{Theorem}{Theorems}
\crefname{corollary}{Corollary}{Corollaries}
\crefname{claim}{Claim}{Claims}
\crefname{example}{Example}{Examples}
\crefname{problem}{Problem}{Problems}
\crefname{setting}{Setting}{Settings}
\crefname{definition}{Definition}{Definitions}
\crefname{assumption}{Assumption}{Assumptions}
\crefname{subsection}{Subsection}{Subsections}
\crefname{section}{Section}{Sections}
\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}
\newcommand{\eps}{\varepsilon}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbb{R}}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{
\hbox to 5.78in { {\bf CS 270: Combinatorial Algorithms and Data Structures
} \hfill #2 }
\vspace{4mm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{2mm}
\hbox to 5.78in { {\em #3 \hfill #4} }
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribe: #4}{Lecture #1}}
\newtheorem{theorem}{Theorem}[section]
\newtheorem*{theorem*}{Theorem}
\newtheorem{itheorem}{Theorem}
\newtheorem{subclaim}{Claim}[theorem]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem*{proposition*}{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem*{lemma*}{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem*{conjecture*}{Conjecture}
\newtheorem{fact}[theorem]{Fact}
\newtheorem*{fact*}{Fact}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem*{exercise*}{Exercise}
\newtheorem{hypothesis}[theorem]{Hypothesis}
\newtheorem*{hypothesis*}{Hypothesis}
\newtheorem{conjecture}[theorem]{Conjecture}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{setting}[theorem]{Setting}
\newtheorem{construction}[theorem]{Construction}
\newtheorem{example}[theorem]{Example}
\newtheorem{question}[theorem]{Question}
\newtheorem{openquestion}[theorem]{Open Question}
\newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{protocol}[theorem]{Protocol}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{exercise-easy}[theorem]{Exercise}
\newtheorem{exercise-med}[theorem]{Exercise}
\newtheorem{exercise-hard}[theorem]{Exercise$^\star$}
\newtheorem{claim}[theorem]{Claim}
\newtheorem*{claim*}{Claim}
\newtheorem{remark}[theorem]{Remark}
\newtheorem*{remark*}{Remark}
\newtheorem{observation}[theorem]{Observation}
\newtheorem*{observation*}{Observation}
% 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988.
% \topmargin 0pt
% \advance \topmargin by -\headheight
% \advance \topmargin by -\headsep
% \textheight 8.9in
% \oddsidemargin 0pt
% \evensidemargin \oddsidemargin
% \marginparwidth 0.5in
% \textwidth 6.5in
% \parindent 0in
% \parskip 1.5ex
\begin{document}
\lecture{22 --- April 6, 2023}{Spring 2023}{Prof.\ Jelani Nelson}{Sudhanva Kulkarni}
\section{Overview}
In the last lecture we looked at simplex for a given a linear program
\begin{align*}
\min c^Tx
\\
Ax = b
\\
x \ge 0
\\
A \in \mathbb{R}^{m \times n}
\end{align*}
Recall that a vertex is determined by a basis $B \subseteq [n]$ with $|B| = m$. Also $v_B = A_B^{-1}b$.
\newline
In this lecture we cover
\begin{enumerate}
\item Strong Duality
\item Complementary Slackness
\item Ellipsoid algorithm
\item Internal point methods (didn't quite get to this)
\end{enumerate}
\section{Simplex and Strong Duality}
\begin{theorem}[Strong Duality] \label{SD}
If the primal that is bounded and feasible, then its dual is also bounded and feasible. Further, they have the same optimal value.
\end{theorem}
We'll prove the above by looking at the termination condition for simplex. Recall from last lecture that if we fix a basis $B$, we may rewrite our linear program as follows
\begin{align*}
\min c_B^Tx_B + c_N^Tx_N
\\
A_Bx_B + A_Nx_N = b
\\
x_N,x_B \ge 0
\end{align*}
Where $x_B$ is just all components with index belonging to $B$ and $x_N$ is the rest. Then, using the equality constraint, we may write
\begin{align*}
x_B = A_B^{-1}b - A_B^{-1}A_Nx_N
\end{align*}
and thus rewrite our objective as
\begin{align*}
(c_N - A_N^T(A_B^{-1})^Tc_B)^Tx_N = \Tilde{c_N}x_N
\end{align*}
Simplex terminates when $\Tilde{c_N} \ge 0$.
\subsection{Proof of Strong Duality}
Now, to prove strong duality, say we run simplex and achieve $c_N \ge 0$ with a basis $B$. Note that $x_B = A^{-1}_Bb$. Now, for strong duality, we'd want $b^Ty = c^T_Bx_B$ where $y$ is dual feasible. Note that $b^Ty = y^Tb = (c_B)^TA_B^{-1}b$. Before we check that $y$ is dual feasible let's write down the dual. We get
\begin{align*}
\max b^Ty
\\ A^Ty \le c
\end{align*}
Consider the slack vector $s = c - A^Ty$. Then
\begin{align*}
s_B = c_B - A_B^T(A_B^{-1})^Tc_B = c_B - A_B^T(A_B^T)^{-1}c_B = 0
\end{align*}
and
\begin{align*}
s_N = c_N - A_N^T(A_B^{-1})^Tc_B = \Tilde{c_N}
\end{align*}
Since simplex terminated, $c_n \ge 0 \implies s \ge 0$ and so $y$ is feasible. Hence we have strong duality.
\subsection{Runtime of Simplex}
Since simplex essentially looks at each vertex of the constructed polytope, we can try to get a bound on its runtime by looking at maximum distance we'd have to travel in said polytope. To do this, construct a graph corresponding to the polytope in the obvious way. Hirsch conjectured in '57 that that the diameter of such a graph is bounded by $m - n$. This conjecture was disproved in 2011 by Santos \cite{Santos_2012} who showed that $\exists P \subseteq \mathbb{R}^{43}$ which is an intersection of 86 half spaces but its diameter is $\ge 44$.
\newline
While this was certainly disappointing, simplex still worked well in most practical cases, especially with appropriate pivoting. In 2003, Spielman and Teng \cite{spielman2003smoothed}showed that there exists a pivoting rule $P$ such that
$\forall I = (A,b,c)$, we have that $ \mathbb{E}(runtime(Perturb(I))) $ is polynomial. Here $Perturb(I)$ means that we add some small amount of noise to each of the variables in our program (say Gaussian with mean 0 and extremely small variance).
\section{Complementary Slackness}
\begin{theorem}[Complementary Slackness] \label{CS}
Say the primal and dual are both bounded and feasible with optimal solutions $x,y$ respectively. Then, if $s = c - A^Ty$, we have $\forall i \in [n]$
\begin{itemize}
\item $x_i > 0 \implies s_i = 0$
\item $s_i > 0 \implies x_i = 0$
\end{itemize}
\end{theorem}
\subsection{Proof}
The proof of the above uses Strong Duality. By Theorem \ref{SD} we have
\begin{equation} \label{1}
c^Tx - b^Ty= 0
\end{equation}
Now let's consider $x \cdot s = x^Ts$. We have
\begin{align*}
x^Ts = x^T(c - A^Ty) = c^Tx - (Ax)^Ty = c^Tx - b^Ty
\end{align*}
So, we get
\begin{equation} \label{2}
\sum_i x_is_i = c^Tx - b^Ty
\end{equation}
from (\ref{1}) and (\ref{2}) we get $\sum_i x_i s_i = 0$. But since $x \ge 0$ and $s \ge 0$, the only way we can sum to 0 is if each summand is 0. So one term in each summand must be 0.
\section{Ellipsoid Algorithm}
Ellipsoid was invented by Khachiyan in 1979 \cite{khachiyan1979polynomial} , 32 years after simplex. It is (weakly) polynomial in $m,n,l$ which represent the number of variables, constraints and bit complexity. The only requirement for ellipsoid to work is a sub-routine that has the following specification - If we give the subroutine a point as an input, it is able to give us either one constraint that the point violates, or it tells us if the point is feasible.
Note that ellipsoid doesn't actually solve for optimum. It only gives us a feasible point (But we'll see how to get around this in a very straightforward way).
\subsection{Geometric intuition for algorithm}
Given any symmetric PSD matrix $P$, we describe an ellipse with centre $\alpha$ by
\begin{align*}
(x - \alpha)^TP(x - \alpha) \le 1
\end{align*}
Or equivalently \begin{align*}
||B(x - \alpha)||_2^2 \le 1
\end{align*}
Where $A = B^TB$. The unit sphere with centre at origin is seen as a special case of this by just letting $A = I$, $\alpha = 0$. \newline
A polytope, as seen before, is described by a system of inequalities $Ax \le b$.
\newline
The idea behind ellipsoid is to keep finding ellipses that cover our polytope, use the "checking" subroutine on some point, and then lower our volume of search accordingly
\begin{align*}
\includegraphics{ellipse.png}
\end{align*}
\subsection{The algorithm}
\begin{algorithm}
Ellipsoid:
\\
\begin{itemize}
\item Set $E_0$ to be some extremely large sphere containing our polytope $P$
\item for $i = 0,2, \ldots k$
\begin{enumerate}
\item Run the constraint checking subroutine on the centre of $E_i$
\item If the point is feasible, return
\item Else let $E_{i + 1}$ be the smallest ellipsoid containing the intersection of the half-plane returned in step 1 and $E_i$
\end{enumerate}
\end{itemize}
\end{algorithm}
Thankfully, the step of constructing the new ellipse is a closed form problem that can be solved easily; Unthankfully, we don't show the solution here.
\newline
Showing that this algorithm works relies on the fact that $vol(E_{k+1})/vol(E_k) \le \exp{\frac{1}{2(n+1)}}$. Then, since our first elliposid is an n-dimensional sphere, we have
\begin{align*}
vol(E_0) = \frac{\pi^{n/2}}{\Gamma(\frac{n}{2} + 1)}r^n
\end{align*}
So after $k$ iterations, we're bounded by something on the order of $(nR)^n \cdot \exp{\frac{k}{2(n+1)}}$. Picking $k$ appropriately large, we see that the algorithm terminates.
\subsection{Modifications to Ellipsoid}
As we just saw, Ellipsoid only returns a feasible point, not the optimal one. However, we can change this by simply solving the primal and dual at the same time! Then any feasible solution is optimal. So, we input the constraints
\begin{align*}
Ax = b
\\
A^Ty \le c
\\
x \ge 0
\\
c^Tx = b^Ty
\end{align*}
Another issue one may run into is the polytope being "too thin", so its volume would be extremely low and so we'd need extremely large $k$ for this process to terminate. We can solve this by using our algorithm on a newly constructed polytope $P'$ which is given by
\begin{align*}
P' = \{ (x,z) : Ax \le b + z \cdot \bold{1}
\\ \forall i -2^l \le x_i \le 2^l
\\ \forall i -2^l \le z_i \le 2^l
\end{align*}
This beefs up our polytope and makes convergence more realistic.
\section{Next Lecture}
In the next lecture, we'll go over interior point methods for optimization problems.
\begin{thebibliography}{9}
\bibitem{spielman2003smoothed}
Daniel A. Spielman and Shang-Hua Teng, (2003) Smoothed Analysis of Algorithms: Why the Simplex Algorithm Usually Takes Polynomial Time
\bibitem{Santos_2012}
Francisco Santos (2012) A counterexample to the Hirsch Conjecture, Annals of Mathematics
\bibitem{khachiyan1979polynomial}
Khachiyan, Leonid Genrikhovich (1979) A polynomial algorithm in linear programming, \textit{Doklady Akademii Nauk}
\end{thebibliography}
\end{document}