\documentclass[11pt]{article}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{fullpage}
\usepackage[capitalise,nameinlink]{cleveref}
\crefname{lemma}{Lemma}{Lemmas}
\crefname{fact}{Fact}{Facts}
\crefname{theorem}{Theorem}{Theorems}
\crefname{corollary}{Corollary}{Corollaries}
\crefname{claim}{Claim}{Claims}
\crefname{example}{Example}{Examples}
\crefname{problem}{Problem}{Problems}
\crefname{setting}{Setting}{Settings}
\crefname{definition}{Definition}{Definitions}
\crefname{assumption}{Assumption}{Assumptions}
\crefname{subsection}{Subsection}{Subsections}
\crefname{section}{Section}{Sections}
\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}
\newcommand{\eps}{\varepsilon}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbb{R}}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{
\hbox to 5.78in { {\bf CS 270: Combinatorial Algorithms and Data Structures
} \hfill #2 }
\vspace{4mm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{2mm}
\hbox to 5.78in { {\em #3 \hfill #4} }
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribe: #4}{Lecture #1}}
\newtheorem{theorem}{Theorem}[section]
\newtheorem*{theorem*}{Theorem}
\newtheorem{itheorem}{Theorem}
\newtheorem{subclaim}{Claim}[theorem]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem*{proposition*}{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem*{lemma*}{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem*{conjecture*}{Conjecture}
\newtheorem{fact}[theorem]{Fact}
\newtheorem*{fact*}{Fact}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem*{exercise*}{Exercise}
\newtheorem{hypothesis}[theorem]{Hypothesis}
\newtheorem*{hypothesis*}{Hypothesis}
\newtheorem{conjecture}[theorem]{Conjecture}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{setting}[theorem]{Setting}
\newtheorem{construction}[theorem]{Construction}
\newtheorem{example}[theorem]{Example}
\newtheorem{question}[theorem]{Question}
\newtheorem{openquestion}[theorem]{Open Question}
% \newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{protocol}[theorem]{Protocol}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{exercise-easy}[theorem]{Exercise}
\newtheorem{exercise-med}[theorem]{Exercise}
\newtheorem{exercise-hard}[theorem]{Exercise$^\star$}
\newtheorem{claim}[theorem]{Claim}
\newtheorem*{claim*}{Claim}
\newtheorem{remark}[theorem]{Remark}
\newtheorem*{remark*}{Remark}
\newtheorem{observation}[theorem]{Observation}
\newtheorem*{observation*}{Observation}
% 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988.
% \topmargin 0pt
% \advance \topmargin by -\headheight
% \advance \topmargin by -\headsep
% \textheight 8.9in
% \oddsidemargin 0pt
% \evensidemargin \oddsidemargin
% \marginparwidth 0.5in
% \textwidth 6.5in
\parindent 0in
% \parskip 1.5ex
\usepackage{graphicx}
\graphicspath{ {./images/} }
\usepackage[font=small,labelfont=bf]{caption}
\begin{document}
\lecture{16 --- March 9, 2023}{Spring 2023}{Prof.\ Jelani Nelson}{Brandon Tran, Laryn Qi}
\section{Outline}
\begin{enumerate}
\item $k$-server + weighted paging
\item LP Duality recap
\item Online primal-dual
\end{enumerate}
\textbf{Project Proposal}
\begin{itemize}
\item Lightweight assignment
\item Purpose is primarily for Jelani to catch issues, give advice, provide readings, etc. earlier rather than later
\item Max 2 pages
\end{itemize}
\section{$k$-server problem}
The $k$-server problem can generalize many online problems. The problem is stated as follows: we have an $n$-point metric space. Each of the $k$ servers lies on a point in the metric space ($k 0$.
\item \textbf{Symmetry}: $d(x, y) = d(y, x)$
\item \textbf{Triangle inequality}: $d(x, z) \leq d(x, y) + d(y, z)$
\end{enumerate}
\subsection{$k$-server as a general online problem}
\begin{enumerate}
\item \textbf{Paging} is a special case of k-server. Points $\Longleftrightarrow$ pages/memory addresses. $k := \text{size of cache}$. $n := \text{\# of memory addresses}$ (in reality finite, although we often think of it as infinite). $\forall x\neq y: d(x,y) = 1$.
\item \textbf{Weighted Paging}: In Weighted Paging, local (in cache) pages cost less than remote pages (e.g. must make a network call). Requesting a page is not always uniform cost. Each page $p$ has cost $c(p)$. $d(x,y) := \frac{1}{2}(c(x) + c(y))$. Initialize all k-servers to the same single NULL point. Every time a page moves to a new point, pay half its cost. Pay the second half when it leaves that point. At the end, move everybody back to NULL to account for remaining costs.
\begin{itemize}
\item (maybe) pset5 Q3: Bansal, Buchbinder, Naor showed $O(\lg k)$ competitive for weighted paging [1].
\end{itemize}
\end{enumerate}
\subsection{History of $k$-server}
\begin{enumerate}
\item $(2k-1)$-competitive "work function" algorithm (deterministic). Shown in [Koutsoupias, Papadimitriou. '95].
\item Conjecture: $O(\lg k)$ randomized is possible. However, [Bubeck, Coester, Rabani '22] (fresh off the press!) showed that you cannot beat $\Omega(\lg^2 k)$.
\item Bubeck et al found $O(\lg^2 k \lg n)$ [4].
\end{enumerate}
\section{Recap of Linear Programming and Duality}
\subsection{Linear Programs}
In linear programming, we are given a matrix $A \in \R^{m \times n}$, vectors $b \in \R^m$, $c \in \R^n$, and we wish to find an $x \in \R^n$ that solves the following optimization problem:
\begin{align*}
\min_{x \in \R^n} \quad & c^T x\\
\textrm{s.t.} \quad & Ax \geq b\\
& x \geq 0 \\
\end{align*}
where the inequalities are taken entrywise. This is called the \textbf{primal} linear program. We have a set of $m$ linear constraints, i.e. $A_1^T x \geq b_1$, $A_2^T x \geq b_2$, ... where $A_i$ is the $i$th row of $A$. These constraints define a set of half-spaces. The intersection of these half-spaces is always going to be some kind of polytope, with possibly unbounded sides -- we call this the feasible region. $c$ can be thought of as ``the direction of gravity''. Drop a marble in the feasible region, and it follows the direction of $c$. No matter what, it will eventually end up in a corner.
\begin{center}
\includegraphics[scale=0.33]{primal-optimization.jpg}
\captionof{figure}{Visualization of an example primal optimization problem}
\end{center}
A few notes on the generality of this form of the primal problem:
\begin{itemize}
\item We can convert a $\geq$ constraint to a $\leq$ constraint by multiplying by $-1$.
\item An equality constraint is just a $\geq$ AND $\leq$ constraint. Convert the $\geq$ constraing to $\leq$ as described above.
\item If you don't want nonnegative constraints: any number $Z$ can be written as $Z = Z^+ - Z^- : Z^+, Z^- \geq 0$.
\item If you have maximization problem, negate $c$ to convert to minimization.
\end{itemize}
\subsection{LP Duality}
In the primal, we are trying to minimize $c^T x$ subject to $Ax \geq b$ and $x \geq 0$. We would like to provide a lower bound on $c^T x$. This is where duality comes in.\\
Consider our constraints $Ax \geq b$. For the $i$th constraint, come up with a new variable $y_i$ and multiply both sides by $y_i$: $A_i^T x y_i \geq by_i$. We also require $y_i \geq 0$ so the the inequality is preserved. Then, add up all of the constraints. In vector form, this gives us
\begin{equation*}
y^TAx = (A^T y)^T x \geq b^T y
\end{equation*}
It would be great if $A^T y = c$, because this would mean $c^T x \geq b^T y$, and $c^T x$ is what we're trying to lower bound. So $b^T y$ would be a lower bound on the primal LP! It turns out, because $x \geq 0$, it is enough that $A^T y \leq c$, since if we're multiplying the entries of $A^T y$ by nonnegative numbers and adding them up, finding a lower bound on that value implies that $c^T x$ is also lower bounded. Mathematically, $(A^Ty)^T x \geq b^T y \implies c^Tx \geq b^T y$. Therefore, we define the \textbf{dual} linear program as
\begin{align*}
\max_{y \in \R^m} \quad & b^T y\\
\textrm{s.t.} \quad & A^T y \leq c\\
& y \geq 0 \\
\end{align*}
It is possible that the dual is infeasible.
\begin{theorem}[Weak Duality] \label{thm:WD}
$\forall x, y$ feasible for primal/dual respectively. $\text{cost}_{primal}(x) \geq \text{cost}_{dual}(y)$. In particular, $OPT(primal) \geq OPT(Dual)$.
\end{theorem}
\begin{proof}
By construction of Primal/Dual.
\end{proof}
\begin{theorem}[Strong Duality] \label{thm:SD}
Primal bounded (bounded means doesn't go to infinity) and feasible (green region is not empty set) implies dual is as well and $OPT(primal) = OPT(Dual)$
\end{theorem}
\begin{proof}
In a future lecture.
\end{proof}
\section{Online Primal-Dual} [Buchbinder, Naor '09]
$x \in R^n$ starts off as $0$. We see constraints ($A_i$, $b_i$) come online (from primal) which must be satisfied at each time step. We are allowed to increment entries of $x$, i.e. the entries of $x$ are only allowed to increase monotonically.
\subsection{Approximating OPT}
\begin{theorem}[Approximate Complementary Slackness] \label{thm:ACS}
Suppose $x,y$ are feasible solutions to the primal and dual, respectively. The primal has $n$ variables, $m$ constraints, dual is opposite ($m$ variables, $n$ constraints). Variables become constraints and vice versa (e.g. for each primal constraint, we multiply it by a new $y_i$ dual variable).
\end{theorem}
\begin{enumerate}
\item $x_i > 0 \implies \frac{c_i}{\alpha} \leq (A^T y)_i \leq c_i$
(Second inequality is automatically implied by feasibility of $y$)
\item $y_i > 0 \implies \beta b_i \geq (Ax)_i \geq b_i$ (Second inequality is automatically implied by feasibility of $x$)
\item If $x, y$ satisfy 1. and 2., then $c^T x \leq \alpha \beta b^T y$.
\end{enumerate}
\begin{proof}
pset5, Q2
\end{proof}
\textbf{Significance}: In online algorithms, we aren't going to get a competitive ratio of 1, we can only approximately solve the LP. It turns out, approximate complementary slackness gives us a ``recipe'' for approximating the LP. Essentially what we do is as follows: set up primal and dual LPs, and as points come online, increment $x$ and $y$ such that we maintain feasible solutions to the primal and dual and such that they satisfy the conditions of approximate complementary slackness. At the end, the values of the two problems gives some guarantees about the quality of our solution to the primal with respect to OPT.
\begin{equation*}
b^T y \leq \mathrm{OPT}(\mathrm{Dual}) = \mathrm{OPT}(\mathrm{Primal}) \leq c^T x \leq \alpha \beta b^T y \leq \alpha\beta \cdot \mathrm{OPT}
\end{equation*}
which means
\begin{equation*}
c^T x \leq \alpha \beta \cdot \mathrm{OPT}
\end{equation*}
So if we maintain primal and dual feasible solutions that satisfy the conditions of approximate complementary slackness, it means that the $x$ we found is ``not too far'' from OPT.
\subsection{Ski rental}
Recall the ski rental problem from lecture 14. We formulate the primal LP for this problem as
\begin{align*}
\min_{x} \quad &Bx + \sum_{i=1}^n z_i\\
\textrm{s.t.} \quad & x+z_i \geq 1 \quad \forall i\\
& x, z \geq 0 \\
\end{align*}
$B$ is the cost of buying the skis, $x \in \{0, 1\}$ represents whether we bought the skis or not. $z_i \in \{0, 1\}$ represents whether or not we rented on day $i$. On each day, we have to have either bought or rented skis. Below are what the matrices look like for the primal.
\begin{gather*}
A =
\begin{bmatrix}
1 & 1 & 0 & \ldots & 0 \\ 1 & 0 & 1 & \ldots & 0 \\ \vdots & 0 & 0 & \ddots &\vdots \\ 1 & 0 & 0 & \ldots & 1
\end{bmatrix}
\begin{bmatrix}
x \\ z_1 \\ \vdots \\z _n
\end{bmatrix}\\
c = \begin{bmatrix}
B \\ 1 \\ \vdots \\ 1
\end{bmatrix}\\
b = \begin{bmatrix}
1 \\ 1 \\ \vdots \\ 1
\end{bmatrix}
\end{gather*}
The dual is
\begin{align*}
\max_{y} \quad &\sum y_i\\
\textrm{s.t.} \quad & \sum y_i \leq B\\
& y_i \leq 1 \quad \forall i\\
& y\geq 0 \\
\end{align*}
\textbf{Algorithm}:
Maintain primal+dual feasible solutions $x, z_i$'s $y_i$'s (initially all $0$). Seeing a new constraint is like seeing a new variable $y_i$. Gradually increase $y_i$ until one of the dual constraints $y_i$ is involved in ($y_i \leq 1, \sum y_i \leq B$) becomes tight. Set the corresponding primal variable to $1$.
In terms of approximate complementary slackness: if $x > 0 \implies x = 1 \implies \sum y_i = B =\implies \alpha = 1$ or if $z_i > 0 \implies y_i = 1 \implies \alpha = 1$ so in either case $\alpha = 1$. Since $x + z_i \leq 2$ (sum of two $0$-$1$ variables), which means $\beta = 2$. These values of $\alpha$ and $\beta$ implies cost(algorithm solution) $\leq 2 OPT(primal) \leq 2 OPT(\text{ski rental})$. Note that our primal problem formulation is not exactly equivalent to ski rental since we're missing integrality constraints. However, adding more constraints to our primal problem could only increase the min, implying the second inequality: $\implies OPT(\text{ski rental}) \geq OPT(primal)$\\
In online primal-dual, it is possible (depending on the problem) to get a constraint which is impossible to satisfy. \\
\begin{claim}
With randomization, competitive ratio of roughly $\frac{e}{e-1}$ is possible.
\end{claim}
\textbf{Idea}: Will maintain fractional solutions online (i.e. not integer LP, we'll fix this later).
\textbf{Algorithm}: See new day $i$
\begin{itemize}
\item if $x\geq 1$, do nothing (i.e. $x+z_i \geq 1$ is already satisfied)
\item set $y_i \gets 1$
\item $z_i \gets 1-x$
\item $x \gets (1+\frac{1}{B})x + \frac{1}{cB}$, $c$ is to be determined.
\end{itemize}
\textbf{Recall}: $b^T y\leq OPT \leq c^T x$. If $c^T x \leq \gamma b^T y$, then $c^T x \leq \gamma OPT$. Want to upper bound $\frac{c^T x}{b^T y} \leq \gamma$. If we can find this bound, this means we have a $\gamma$ competitive solution.\\
\textbf{Primal solution feasible}: $y_i = 1 \geq 0$, $z_i \gets 1-x$, and $x$ becomes bigger.
\begin{itemize}
\item $z_i \geq 0$ since $x \leq 1$ and $z_i := 1 - x$.
\item $x \geq 0$ since $x$ starts at $0$ and can only increase.
\item $x+z_i \geq 1$ since $x+z_i = x+(1-x) = 1 \geq 1$.
\end{itemize}
\textbf{Dual solution feasible}: Need to show $x=1$ after $\leq B$ days. $\frac{c^T x}{b^T y}$. Look at $\frac{\Delta c^T x}{\Delta b^T y}$ each day. If these are bounded by $\gamma$, then the end result is also bounded by $\gamma$. If $x$ isn't already $1$: $\Delta \text{dual} = 1$. $\Delta \text{primal} = B \Delta x + 1-x = B ((1+\frac{1}{B})x + \frac{1}{cB} - x) + 1-x = Bx + x + \frac{1}{c} - Bx + 1 - x = 1 + \frac{1}{c}$. This means $\frac{\Delta primal}{\Delta dual} \leq 1 + \frac{1}{c}$. \\
Now we'll show that $x=1$ after $\leq B$ days. Let $r := \frac{1}{cB}$, $q := 1 + \frac{1}{B}$, and $x_i := $ value of $x$ at day $i$.
\begin{gather*}
x_{i+1} \gets (1+\frac{1}{B})x_i + \frac{1}{cB} = qx_i + r\text{;}\\
x_0 := 0 \\
x_1 = r\\
x_2 = qr + r\\
x_3 = q^2 r + qr + r\\
\ldots\\
x_k = r \sum_{i=0}^{k-1} q^i
\end{gather*}
\textbf{Want}: when $k=B$, $x_k=1$ (otherwise, this ruins dual feasiblity) $\implies$ want $r \sum_{i=0}^{B-1} q^i = 1$.
\begin{gather*}
r \frac{q^B-1}{q-1} = \frac{1}{cB} \frac{(1+\frac{1}{B})^B - 1}{\frac{1}{B}}
\end{gather*}
To make this $1$, set $c := (1+\frac{1}{B})^B - 1$. $(1+\frac{1}{B})^B \approx e$, so $c \approx e - 1$. Which means $\frac{\Delta primal}{\Delta dual} \leq 1 + \frac{1}{e-1} \approx \frac{e}{e-1}$. This proves the dual solution is feasible. Now, we just need to fix this fractional solution by making it integer. \\
On first day added $x_1$ to 0, then added $x_2$ to that, then $x_3$ to the result of that, etc. Pick $u \in [0, 1]$ unif. at random.
\begin{center}
\includegraphics[scale=0.33]{images/fractional-soln-fix.jpg}
\captionof{figure}{Visualization of fixing fractional solution}
\end{center}
Buy on the first day that $x$ exceeds $u$. The expected cost is $B \cdot P(\text{buy}) + \sum_i P(z_i = 1)$. $P(\text{buy}) = P(u \leq x) = x$, $P(z_i = 1) = z_i \implies E[cost(primal)] = Bx + \sum_iz_i = cost(primal)$.
\newpage
\bibliographystyle{alpha}
\begin{thebibliography}{42}
% \bibitem{AlonMS99}
% Noga~Alon, Yossi~Matias, Mario~Szegedy.
% \newblock The Space Complexity of Approximating the Frequency Moments.
% \newblock {\em J. Comput. Syst. Sci.}, 58(1):137--147, 1999.
\bibitem{BBN07}
Nikhil Bansal, Niv Buchbinder, Joseph (Seffi) Naor.
\newblock A Primal-Dual Randomized Algorithm for Weighted Paging.
\newblock {\em Foundations of Computer Science} (FOCS), IEEE Annual Symposium on (2007)
\bibitem{KP95}
Elias Koutsoupias and Christos H Papadimitriou.
On the k-server conjecture.
\newblock {\em Journal of the ACM}, 42(5):971–983, 1995.
\bibitem{BCR22}
Sébastien Bubeck, Christian Coester, Yuval Rabani.
\newblock Shortest Paths without a Map, but with an Entropic Regularizer.
\newblock {\em arXiv}, 2022
\bibitem{B18}
Bubeck et al.
\newblock k-server via multiscale entropic regularization.
\newblock {\em arXiv}, 2017, STOC 2018
\bibitem{BN09}
Niv Buchbinder, Joseph (Seffi) Naor.
\newblock The Design of Competitve Online Algorithms via a Primal-Dual Approach.
\newblock {\em Foundations and Trends in Theoretical Computer Science}, Vol.3, Nos. 2-3, 2009
\end{thebibliography}
\end{document}