\documentclass[11pt]{article}
\usepackage{jeffe,handout,graphicx}
\newtheorem{theorem}{Theorem}
% ==========================================================
\begin{document}
\begin{center}
\LARGE
\textbf{Algorithms and Theoretical Computer Science}
\\
\textbf{Ph.D. Qualifying Examination}
\\
\textbf{Spring 2006}
\bigskip
\bigskip
%
%\begin{tabular}{|p{2.5in}|p{2.5in}|}
%\hline
%\multicolumn{2}{|l|}{Name:}
%\\\hline
%Net ID: & Alias:
%\\\hline
%\end{tabular}
%\vfil
\hrule
\normalsize
\begin{itemize}
\item
The exam consists of eight written questions, four in the morning and
four in the afternoon. You will have three hours for each group of four
questions. Please start your answers to each numbered question on a new
sheet of paper.
\item
All else being equal, it is better to solve some of the problems
completely than to get partial credit on every problem.
\end{itemize}
\hrule
%
%\vfil
%\includegraphics[height=4in]{dino-qual.pdf}
\vfil
\LARGE
\begin{tabular}{|c||c|c|c|c||c|c|c|c||c|}
\hline
\# & ~1~ & ~2~ & ~3~ & ~4~ & ~1~ & ~2~ & ~3~ & ~4~ & ~$\Sum$~ \\ \hline\hline
Score & & & & & & & & & \\ \hline
Grader & & & & & & & & & \\ \hline
\end{tabular}
\end{center}
% ----------------------------------------------------------------------
\headers{Algorithms and Theory Qual}{}{Spring 2006}
\section*{Algorithms and Data Structures}
\begin{enumerate}
\parindent2em
% ----------------------
\item % SH
Let $G$ be an undirected complete graph with edge weights that satisfy the triangle inequality. For any integer $r$, an \emph{$r$-tour} of $G$ is a simple cycle $\pi$ such that every vertex in $G$ is within distance $r$ of some vertex in $\pi$ (where distances are computed according to the edge weights).
\begin{itemize}
\item{[5 pts]}
Prove that for any fixed integer $r$, computing the shortest $r$-tour of a given graph is NP-Complete.
\item{[10 pts]}
Show that if we know the \emph{vertices} of the shortest $r$-tour, then we can compute an $r$-tour that is at most twice as long as the shortest $r$-tour.
\item{[35 pts]}
Describe a polynomial-time algorithm that outputs an $\alpha r$-tour of $G$, of length at most $\beta\cdot\textsc{Opt}_r$, where $\textsc{Opt}_r$ is the length of the optimal $r$-tour of $G$ and $\alpha$ and $\beta$ are constants. (You should try to make $\alpha$ and $\beta$ as small as possible, but any constant values are interesting.)
\end{itemize}
% ----------------------
\bigskip\vfil
\item % JE
Suppose you are given a set of $n$ axis-aligned ellipses in the plane, all centered at the origin. Describe and analyze an efficient algorithm to compute the union of these ellipses. The output should be a combinatorial description of the boundary of the union. Don't worry about the underlying algebraic details---assume that you can compute any useful function of a constant number of ellipses in constant time. (But please tell us what those useful functions are!)
\begin{center}
\includegraphics[width=5in]{ellipses}
\end{center}
%% ----------------------
%\bigskip
%\item % SH
%Let $L$ be a multiset of $m$ lines in the plane. The \emph{crossing distance} between two points $p$ and~$q$ in the plane is the number of lines in $L$ that intersect the closed segment $\overline{pq}$.
%\begin{enumerate}
%\item
%An \emph{arrangement vertex} is the intersection point of two lines in $L$. Prove, that for any point $p$ in the plane and any $\delta>8$, there are $\Omega(\delta^2)$ arrangement vertices within crossing distance $\delta$ of $p$.
%\item
%let $P$ be a set of $n$ points in the plane. Prove that there are two points $u,v \in P$ whose crossing distance (with respect to $L$) is at most $O(m /\sqrt{n})$.
%\item
%Prove that
%\[
% \prod_{i=1}^n \left(1 + \frac{1}{\sqrt{i}}\right) \leq \exp( c \sqrt{n})
%\]
%for some constant $c$.
%\item
%Consider the following algorithm for computing a spanning tree of $P$. Starting with $T=\varnothing$, repeat the following process until $P$ contains only one point:
%\begin{itemize}
%\item
%Find the pair of points $u,v\in P$ whose crossing distance (with respect to $L$) is minimized.
%\item
%Add the segment $\overline{uv}$ to $T$.
%\item
%Remove either $u$ or $v$ from $P$. (It doesn't matter which.)
%\item
%Duplicate the lines in $L$ that intersect $\overline{uv}$ and add them back to $L$. (Alternatively, just double the weight of all these lines.)
%\end{itemize}
%Prove that if $m = O(n^2)$, then no line of $L$ crosses more than $O( \sqrt{n} )$ edges of the spanning tree $T$ computed by this algorithm.
%\end{enumerate}
% ----------------------
\bigskip\vfil
\item % SH
Let $\pi(n)$ denote the number of prime numbers smaller than $n$.
\begin{enumerate}
\item
Show that the product of all primes $p$ with $m < p \leq 2m$ is at most $\binom{2m}{m}$.
\item
Using part (a), prove that $\pi(n) =O(n /\log n)$.
\item
Let $p$ be a prime number, and let $m$ and $k$ be natural numbers. Prove that if $p^k$ divides $\binom{2m}{m}$ then $p^k \leq 2m$.
\item
Using part (c), prove that $\pi(n) = \Omega( n / \log n)$.
\end{enumerate}
% ----------------------
\newpage
\item % JE
\begin{enumerate}
\item{[10 pts]}
In a \emph{rooted ordered binary tree}, each node has a right child, a left child, both, or neither. Sketch an efficient algorithm to compute, given two rooted ordered binary trees $A$ and~$B$, the smallest rooted ordered binary tree that contains both $A$ and $B$ as rooted ordered subtrees.
\item{[10 pts]}
In a \emph{rooted binary tree}, each node has zero, one, or two children. Sketch an efficient algorithm to compute, given two rooted binary trees $A$ and~$B$, the smallest rooted binary tree that contains both $A$ and $B$ as rooted subtrees.
\item{[20 pts]}
In a \emph{rooted tree}, each node has zero or more children. Sketch an efficient algorithm to compute, given two rooted trees $A$ and $B$, the smallest rooted tree that contains both $A$ and $B$ as rooted subtrees.
\item{[10 pts]}
A \emph{free tree} is a connected acyclic undirected graph. Sketch an efficient algorithm to compute, given two free trees $A$ and $B$, the smallest free tree that contains both $A$ and~$B$ as subtrees.
\end{enumerate}
\noindent
In each problem, `smallest' means `with the fewest nodes'. In each part, you may use the preceding parts as subroutines. You don't need to describe each algorithm in complete detail; just give a high-level overview and a convincing argument that its running time is polynomial.
%
%% ----------------------
%\item %MP
%\newcommand{\col}[1]{\ensuremath{{\mathrm{col}}(#1)}}
%\newcommand{\minent}[1]{\ensuremath{{\mathrm H}_\infty(#1)}}
%\newcommand{\from}[1]{\ensuremath{\stackrel{#1}{\leftarrow}}}
%\newcommand{\sdiff}[2]{\ensuremath{\Delta(#1,#2)}}
%\newcommand{\hrep}{\ensuremath{\langle h \rangle}}
%\newcommand{\defeq}{\ensuremath{\mathrel{\mathop :}=}}
%\newcommand{\pr}{\ensuremath{\mathrm{Pr}}}
%\def\X{\mathcal{X}}
%In this problem we shall see how a ``pair-wise independent'' hash family can
%be used to ``smooth out'' the randomness in an a probability distribution.
%Roughly, we shall prove the following: If a random hash function from the
%hash family is applied to an input drawn from a distribution which is not
%close to uniform, but has some amount of randomness, then the output
%distribution will be close to uniform and contains almost all the randomness
%used in the process.
%First we define a few standard notions about probability distributions and prove a few useful relations.
%\begin{itemize}
%\item
%A \emph{probability distribution} over a set $\X$ is a function $P:\X\to[0,1]$ such that
%\[
% \sum_{x\in \X} P(x) = 1.
%\]
%\item
%The \emph{collision probability} of a distribution $P$ over $\X$ is
%\[
% \col{P}
% \defeq \pr_{x\from{P}\X, y\from{P}\X}[x=y]
% = \sum_{x\in \X} (P(x))^2.
%\]
%\item
%The \emph{min-entropy} of $P$ is $\minent{P} \defeq -\log_2\left(\max_{x\in \X} P(x)\right)$.
%\item
%The \emph{statistical difference} between two distributions $P$ and $Q$
%(both over $\X$) is
%\[
% \sdiff P Q \defeq \frac12 \sum_{x\in\X} \abs{P(x)-Q(x)}.
%\]
%\end{itemize}
%\begin{enumerate}
%\parindent 1.5em
%\item
%\textbf{Prove that} $\col P \le 2^{-\minent P}$ for any probability distribution $P$.
%\medskip
%\item
%Let $U_m$ denote the uniform distribution over the set $\set{0,1}^m$.
%\textbf{Prove that} for any probability distribution $P$ over $\set{0,1}^m$,
%\[
% \sdiff P {U_m} \le \frac12 \sqrt{2^m \col P - 1}.
%\]
%\Hint{Use the Cauchy-Schwarz inequality to relate $\sum_x \abs{a_x}$ and
%$\sqrt{\sum_x a_x^2}$.}
%\medskip
%\item
%Now consider hash functions from $n$ bits to $\ell$ bits. A collection $\X$ of $D$ such hash functions is said to be a \emph{pairwise independent family} if for every possible pair of \emph{distinct} inputs $x_1,x_2 \in \set{0,1}^n$ and every possible pair of outputs $y_1,y_2\in\set{0,1}^\ell$, the number of hash functions $h\in\X$ such that $h(x_1)=y_1$ and $h(x_2)=y_2$ is exactly $D/\ell^2$.
%Suppose we are given a pairwise independent family $\X$ of $D=2^d$ hash functions. Any hash function in $\X$ can be specified by $d$ bits; let $\hrep$ denote the $d$-bit representation of $h$. Define $f_h:\set{0,1}^n\times\set{0,1}^d\to\set{0,1}^{d+\ell}$ as $f(x,\hrep)=(\hrep,h(x))$.
%Consider a probability distribution $P$ over $\set{0,1}^n$. Let $Z=f(P,U_d)$. That is, let $Z$ be the distribution over $\set{0,1}^m$ obtained as output of $f$, when the input is drawn from $P\times U_d$. Thus $Z(\hrep,y) = 2^{-d}\sum_{x: h(x)=y} P(x)$.
%\textbf{Prove that} $\col Z = 2^{-d} \left(\col P + (1-\col P)2^{-\ell}\right)$.
%\end{enumerate}
%These three claims imply that if $P$ is a distribution over $\set{0,1}^n$ with min-entropy at least $\ell + 2\log\frac1{\epsilon}$, then $\sdiff {f(P,U_d)}{U_m} < \epsilon/2$.
%
%\paragraph{Solution.}
%\newcommand{\expect}{\ensuremath{\mathbf{E}}}
%\begin{enumerate}
%\item
%$\sum_x (P(x))^2 \le (\max_x P(x))(\sum_x P(x)) = \max_x P(x)$.
%\item
%\begin{align*}
% \sdiff P {U_m}
% &\defeq
% \frac12 \sum_x \abs{P(x) - 2^{-m}}
% &\text{[definition of $\Delta$]}
%\\ &\le
% \frac12 \sqrt{2^m \left(\sum_x (P(x) - 2^{-m})^2\right)}
% &\text{[Cauchy-Schwarz $\ne$]}
%\\ &=
% \frac12 \sqrt{2^m \left(\sum_x (P(x))^2 - 2^{-m}\right)}
% &\text{[algebra]}
%\\ &=
% \frac12 \sqrt{2^m \col Z - 1}
% &\text{[definition of col]}
%\end{align*}
%\item
%\begin{align*}
% \col Z
% &=
% \col{U_d}\cdot \expect_{\hrep\from{U_d}\set{0,1}^d}
% \left[ \pr_{x,y\from{P}\set{0,1}^n}[h(x)=h(y)] \right]
%\\ & =
% \col{U_d}\cdot \expect_{\hrep\from{U_d}\set{0,1}^d}
% \left[ \col P +
% (1-\col P)\,\pr_{x,y\from{P}\set{0,1}^n}[h(x)=h(y)\mid x\ne y]
% \right]
%\\ & = 2^{-d}
% \left(\col P +
% (1-\col P)\,
% \expect_{\hrep\from{U_d}\{0,1\}^d}
% \left[\pr_{x,y\from{P}\{0,1\}^n}[h(x)=h(y)\mid x\ne y]\right]
% \right)
%\\ & =
% 2^{-d} (\col P + (1-\col P)2^{-\ell})
%\end{align*}
%\end{enumerate}
\end{enumerate}
% =====================
\newpage
\section*{Formal Languages and Complexity Theory}
\begin{enumerate}
%
%% ----------------------
%\item % LP
%(a) Prove that logspace reductions are closed under composition.
%(b) Use this to prove that if $A$ is $P$-Complete, and $A$ is in $L$ (= \textsc{Logspace}), then $L = P$.
% ----------------------
\bigskip
\item % MV
Suppose $L \in \mathsf{DSPACE}(o(\log\log n))$. Prove that there is an integer $n_0$ such that $L$ can in fact be recognized in $\mathsf{DSPACE}(\log\log n_0)$; in other words, show that $L$ can be recognized in deterministic \emph{constant} space.
\hidesolutions
\begin{solution}
Consider $L$ recognized by $M$ in $o(\log\log n)$
space. We know that $M$ has $2^{o(\log\log n)}$ configurations, and
therefore runs in $o(\log n)$ time. For a position $i$ on the input
tape, the \emph{crossing sequence} at $i$ is the sequence
$aC_0C_1\ldots C_k$ where $a$ is symbol at position $i$, and $C_j$ is
the configuration of $M$ when it reads $a$ for the $j$th time.
Observe that the number of crossing sequences is at most
$(2^{o(\log\log n)})^{(2^{o(\log \log n)})} = 2^{2^{o(\log\log n)}} =
o(n)$. Thus there is an integer $n_0$, such that for all $n \geq n_0$,
the number of crossing sequences is less than $n/3$. We claim that
$L \in \mathsf{DPSACE}(\log\log n_0) = \mathsf{DSPACE}(1)$.
Suppose $x$ is the shortest string longer than $n_0$ on which the
space requirement is more than $\log\log n_0$. Since $\abs{x} > n_0$, we
know that $x$ can be written as $\alpha a\beta a\gamma a\delta$ such that
the $a$'s in the decomposition have the same crossing sequences.
Now, from the definition of crossing sequences, $\alpha a\gamma a
\delta$ and $\alpha a\beta a\delta$ have the same crossing sequences
in $\alpha$, $\beta$, $\gamma$, and $\delta$. Thus, at least one of them
has the same space requirements as $x$ (depending on whether
the maximum space is needed in $\alpha$, $\beta$, $\gamma$, or $\delta$).
So we have a shorter string that requires more than $\log\log n_0$
space, which contradicts minimality at $x$.
\end{solution}
% ----------------------
\bigskip
\item % LP
Tree automata are a generalization of finite state automata, where the inputs are labeled binary trees instead of strings. More precisely, the input is a finite, oriented binary tree where every node is either a \emph{leaf} with no children, or an \emph{internal node} with a left child, a right child, and a \emph{label} from some finite alphabet $\Sigma$.
Formally, a \emph{tree automaton} is a $5$-tuple $(Q, \Sigma, \delta, q_0, F)$, where just as for standard automata, $Q$ is a finite set of states, $F \subseteq Q$ is the set of accepting states, $q_0$ is the initial state, and $\Sigma$ is the input alphabet (in this case, the legal labels of interior nodes of input trees). Finally, $\delta: Q \times Q \times \Sigma \rightarrow Q$ is the transition function, which associates a state with each node of the tree inductively as follows.
\begin{itemize}
\item
The state associated with each leaf is $q_0$.
\item
The state associated with any internal node $v$ is $\delta(q_L, q_R, a)$, where $q_L$ is the state associated with $v$'s left child, $q_R$ is the state associated with $v$'s right child, and $a$ is the label of $v$.
\end{itemize}
A tree is \emph{accepted} if the state associated with the root is an accepting state.
\begin{enumerate}
\item
State and prove a ``pumping lemma" for tree automata that generalizes the standard pumping lemma for finite state automata.
\item
Using your pumping lemma as part of the proof, exhibit a family of trees that is not accepted by any tree automaton.
\end{enumerate}
% ----------------------
%\bigskip
%\item % MV
%Recall that a \emph{deterministic context free language} (DCFL) is a language $L$ that can be recognized by a deterministic pushdown automata by final state. Show that the language $L = \set{a^mb^nc^k \mid \text{either $m \neq n$ or $n \neq k$}}$ is not a DCFL.
%
%{\bf Solution:} Observe that the class of DCFLs (unlike CFLs) is
%closed under complementation. Thus, if $L$ is DCFL then $\bar{L}$ is
%also DCFL. Consider $L' = \bar{L} \cap a^*b^*c^*$; since $L'$ is an
%intersection of a CFL and a regular language $L'$ is a CFL. But $L' =
%\{a^nb^nc^n\: |\: n \geq 0\}$ which can easily shown to be not a CFL
%(by a pumping lemma argument), and hence we get a contradiction. $L$
%is not a DCFL.
% ----------------------
\bigskip
\item % MV
Recall that a \emph{linear context-free grammar} is a context-free grammar where the right-hand side of every production has at most one non-terminal. A language $L$ is said to be a \emph{linear context-free language} if there is a linear CFG $G$ such that $L = L(G)$. Show that for any linear CFL $L$ and any regular language $R$, the language $L \cap R$ is a linear CFL.
% {\bf Solution:} Let $G = (V, \Sigma, P, S)$ be the linear grammar
% generating $L$ and let $D = (Q, \Sigma, q_0, \delta, F)$ be the DFA
% reconizing $R$. Construct a grammar $G'$ for $L \cap R$ whose
% variables are of the form $[pAq]$, where $A \in V$ and $p,q \in Q$.
% The intuition is that $[pAq]$ will generate strings $w$ iff $w$ is
% generated by $A$, and the DFA $D$ goes from $p$ to $q$ on $w$.
% Defining the rules for grammar $G'$ is straightforward based on this
% intuition.
% ----------------------
\bigskip
\item % LP
The \emph{leaf complexity} of a Boolean function $f:\set{0,1}^n\to\set{0,1}$, denoted
$LC(f)$, is defined as the fewest number of leaves in any formula over the gate set $\{\lor, \land, \lnot\}$ that computes $f$.
\begin{enumerate}
\item
Let $\oplus_n$ denote the parity function on $n$ input bits. Show that $LC(\oplus_n) \leq n^2$. You may assume that $n$ is a power of two.
\item
A \emph{formal complexity measure} is a function $FC : \left(\set{0,1}^n\to\set{0,1}\right)\to\Natural$ that maps $n$-ary Boolean functions to natural numbers and that has the following properties:
\begin{itemize}
\item \emph{Atomicity:} $FC(x_i)$ = 1 for any index $i$.
\item \emph{Symmetry:} $FC(f) = FC(\neg f)$ for any function $f$.
\item \emph{Subadditivity:} $FC(f \lor g) \leq FC(f) + FC(g)$ for any functions $f$ and $g$.
\end{itemize}
Prove that $FC(f) \leq LC(f)$ for any formal complexity measure $FC$ and any $n$-ary Boolean function $f$.
\end{enumerate}
\end{enumerate}
\end{document}