\documentclass[11pt]{article}
\usepackage{jeffe,handout,graphicx}
\newtheorem{theorem}{Theorem}
% ==========================================================
\begin{document}
\begin{center}
\LARGE
\textbf{Algorithms and Theoretical Computer Science}
\\
\textbf{Ph.D. Qualifying Examination}
\\
\textbf{Fall 2005}
\bigskip
\bigskip
%
%\begin{tabular}{|p{2.5in}|p{2.5in}|}
%\hline
%\multicolumn{2}{|l|}{Name:}
%\\\hline
%Net ID: & Alias:
%\\\hline
%\end{tabular}
%\vfil
\hrule
\normalsize
\begin{itemize}
\item
The exam consists of eight written questions, four in the morning and
four in the afternoon. You will have three hours for each group of four
questions. Please start your answers to each numbered question on a new
sheet of paper.
\item
All else being equal, it is better to solve some of the problems
completely than to get partial credit on every problem.
\end{itemize}
\hrule
\vfil
\includegraphics[height=4in]{dino-qual.pdf}
\vfil
\LARGE
\begin{tabular}{|c||c|c|c|c||c|c|c|c||c|}
\hline
\# & ~1~ & ~2~ & ~3~ & ~4~ & ~5~ & ~6~ & ~7~ & ~8~ & ~$\Sum$~ \\ \hline\hline
Score & & & & & & & & & \\ \hline
Grader & & & & & & & & & \\ \hline
\end{tabular}
\end{center}
% ----------------------------------------------------------------------
\headers{Algorithms and Theory Qual}{}{Fall 2005}
\begin{enumerate}
\parindent2em
\bigskip
\item % \textsf{[SH,alg]}
\begin{enumerate}
\item
Prove that sorting $n$ numbers requires $\Omega(n \log n)$ time
in the worst case, if only comparisons are allowed.
\item
Prove that given $n$ numbers, deciding whether any two of them are equal
requires $\Omega(n \log n)$ time in the worst case, if only comparisons are allowed.
\end{enumerate}
\bigskip
\item% \textsf{[SH,alg/prb]}
A \emph{prefix code} is a set of $m$ bit strings $\set{b_1, \dots, b_m}\in \set{0,1}^*$ where no string $b_i$ is a prefix of another string $b_j$. Each string $b_i$ is called the \emph{code word} for symbol $i$.
\begin{enumerate}
% \item Prove that the function $\ln (1/x)$ is convex for $x \geq 0$.
\item
Let $\ell_i$ denote the number of bits in the $i$th code word $b_i$. Prove that every prefix code satisfies the \emph{Kraft inequality}:
\[
\sum_{i} 2^{-\ell_i} \leq 1.
\]
\item
A \emph{probability distribution} is a vector $p = (p_1, p_2, \dots, p_m) \in [0,1]^m$ such that $\sum_i p_i = 1$. What is the expected length of a random code word if each symbol $i$ is chosen with probability $p_i$?
\item
The \emph{entropy} $H(p)$ of a probability distribution $p$ is the quantity $\sum_i p_i \lg (1/p_i)$. Given the probability distribution $p$, show how to compute a prefix code such that the expected length of a random code word is at most $H(p) +1$. How fast is your algorithm?
(In fact, the entropy is maximized for the uniform distribution, and entropy is in fact a tight bound on the best prefix code one can generate.)
% \item Prove that $a \ln (1/a) + b \ln(1/b) \geq \ln 2$,
\end{enumerate}
\bigskip
\item% \textsf{[JE,alg]}
Let $M = (m_{ij})$ be an $n\times n$ matrix in which every row and every column is sorted. Such an array is called \emph{totally monotone}. Assume further that no two elements of $M$ are equal.
\begin{enumerate}
\item
Describe an algorithm to solve the following problem in $O(n)$ time: Given indices $i,j,k,l$ as input, compute the number of elements of $M$ smaller than $m_{ij}$ and larger than $m_{kl}$.
\item
Describe an algorithm to solve the following problem in $O(n)$ time: Given indices $i,j,k,l$ as input, return an element of $M$ chosen uniformly at random from the elements smaller than $m_{ij}$ and larger than $m_{kl}$. Assume the required range is non-empty.
\item
Describe a randomized algorithm to compute the median element of $M$ in $O(n\log n)$ expected time.
\end{enumerate}
\bigskip
\item %\textsf{[MV,aut]}
For any languages $A,B \subseteq \Sigma^*$, define
\[
A/B = \set{x \mid xy \in A \text{ for some } y\in B}
\]
Show that every recursively enumerable language is equal to $A/B$ for some context-free languages $A$ and $B$.
%
%\textbf{Solution:} Let $M$ be the TM such that $L = L(M)$. Consider languages
%\begin{align*}
% \textsc{Odd} &=
% \set{x\# w_1^R\# w_2\# w_3^R\# \cdots \#w_n\# \mid
% w_1 \text{is initial ID of $M$ on input $x$ and }
% w_{i-1} \vdash w_i \text{ for all odd } i}
%\\
% \textsc{Even} &=
% \set{\#w_1^R \#w_2\# w_3^R\# \cdots \#w_n\# \mid
% w_{i-1} \vdash w_i \text{ for all even } i}
%\end{align*}
%Observe that both \textsc{Odd} and \textsc{Even} are CFLs. Finally, we can also see that $L = \textsc{Odd} / \textsc{Even}$.
\newpage
\bigskip
\item
%\textsf{[LP,cxy]}
For any language $L \subseteq \set{0,1}^*$ and any integer $n$, let $L_n$ denote the subset of strings in $L$ of length at most $n$. We say that $L$ is \emph{self-reducible} if there exists a polynomial time oracle Turing machine $M$ such that for any string $x \in \set{0,1}^n$, $x \in L$ if and only if $M^{L_{n-1}}$ accepts $x$. (Here $M^A$ denotes the oracle Turing machine $M$ with oracle $A$.)
\begin{enumerate}
\item Give an example of a language that is self-reducible.
\item Prove that if $L$ is self-reducible, then $L$ is in PSPACE.
\end{enumerate}
\bigskip
\item %\textsf{[MP,prob]}
This problem is concerned with probability distributions over $\set{0,1}^n$, the set of $n$-bit strings. A probability distribution is a function $p:\set{0,1}^n\to[0,1]$ such that ${\sum_{x\in\set{0,1}^n} p(x) = 1}$.
A distribution $p$ is \emph{$\delta$-balanced at position $i$} if and only if
\[
-\delta
~\le~
\abs{p( y_1\ldots y_{i-1} 0 y_i\ldots y_{n-1}) - p( y_1\ldots y_{i-1} 1 y_i\ldots y_{n-1})}
~\le~
\delta,
\]
or equivalently,
\[
\frac{1-\delta}{1+\delta}
\le
\frac{p( y_1\ldots y_{i-1} 0 y_i\ldots y_{n-1})}
{p( y_1\ldots y_{i-1} 1 y_i\ldots y_{n-1})}
\le
\frac{1+\delta}{1-\delta},
\]
for every string $y\in\set{0,1}^{n-1}$. (You should verify that these two inequalities are equivalent.) A distribution is \emph{$\delta$-balanced} if it is $\delta$-balanced at every bit position.
Consider an arbitrary boolean function $f:\set{0,1}^n \to \set{0,1}$. Let $p^f_0$ (resp. $p^f_1$) denote the probability that $f(x)=0$ (resp. $f(x)=1$) when in the input string $x$ is randomly generated according to the distribution~$p$:
\[
p^f_0 = \sum_{x|f(x)=0} p(x), \qquad\qquad
p^f_1 = \sum_{x|f(x)=1} p(x).
\]
\textbf{Prove} that for every boolean function $f:\set{0,1}^n \to \set{0,1}$ and every $\delta\in[0,1]$, there is a $\delta$-balanced distribution $p$ over $\set{0,1}^n$ such that $|p^f_0-p^f_1| \ge \delta$.
\Hint{Consider separately the functions $f$ for which $|u^f_0-u^f_1| \ge \delta$ and those for which
$|u^f_0-u^f_1| < \delta$, where $u$ is the uniform distribution.}
\bigskip
\item
%\textsf{[MV,aut]}
For any strings $x,y \in \Sigma^*$, write $x \sqsubseteq y$ if $x$ is a (not necessarily contiguous) subsequence of $y$. For example, $\textsf{radar} \sqsubseteq \textsf{abracadabra}$. A subset $L \subseteq \Sigma^*$ is said to be \emph{downward closed} if $x \in L$ and $w\sqsubseteq x$ implies that $w \in L$. Prove that every downward closed set $L$ is regular.
\Hint{You may use the following observation due to Higman without proof: The set of $\sqsubseteq$-minimal elements of any set $L \subseteq \Sigma^*$ is finite.}
%\textbf{Solution:}
%Regular languages are closed under complementation. Hence if we show that $\bar{L}$ is regular whenever $L$ is downward closed, we would have proved the result. If $L$ is downward closed then $\bar{L}$ is upward closed. For a string $x = a_1a_2\cdots a_n$, let $\uparrow x = \set{y \mid x \sqsubseteq y} = \Sigma^*a_1\Sigma^*a_2\Sigma^*\cdots a_n\Sigma^*$; thus, $\uparrow x$
%is regular for any $x$. Let $\min(\bar{L})$ be the set of minimal elements in $\bar{L}$. Since $\bar{L}$ is upward closed, we have
%\[
% \bar{L} = \bigcup_{x \in \min(\bar{L})} \uparrow x
%\]
%Further by Higman's Lemma, we have $\min(\bar{L})$ is finite. Thus, $\bar{L}$ is a finite union of regular languages, which is regular.
\let\x\underline
\bigskip
\item %\textsf{[JE,alg/aut]}
Let $x = x_1x_2\dots x_n$ be an $n$-character string over some finite alphabet $\Sigma$, and let $A$ be a deterministic finite-state machine with $m$ states.
\begin{enumerate}
\item
Describe and analyze an algorithm to compute the longest (not necessarily contiguous) subsequence of $x$ that is accepted by $A$. (For example, $A$ accepts the language $(\mathsf{ar})^*$ and $x = \mathsf{\x{a}b\x{r}ac\x{a}dab\x{r}a}$, your algorithm should output \textsf{arar}.)
\item
Describe and analyze an algorithm to compute the longest (not necessarily contiguous) supersequence of $x$ that is accepted by $A$. (For example, if $A$ accepts the language $(\mathsf{abcdr})^*$ and $x = \mathsf{abracadabra}$, your algorithm should output \textsf{\x{ab}cd\x{ra}b\x{c}dr\x{a}bc\x{d}r\x{ab}cd\x{ra}bcdr}.)
\end{enumerate}
\end{enumerate}
\end{document}