diff --git a/tex/references.bib b/tex/references.bib
index f7fe8b9..adeb255 100644
--- a/tex/references.bib
+++ b/tex/references.bib
@@ -2425,6 +2425,32 @@ CONCLUSIONS: We developed a simplified, semi-closed system for the initial selec
   publisher = {Wiley},
 }
 
+@Article{Qiu2011,
+  author    = {Peng Qiu and Erin F Simonds and Sean C Bendall and Kenneth D Gibbs and Robert V Bruggner and Michael D Linderman and Karen Sachs and Garry P Nolan and Sylvia K Plevritis},
+  journal   = {Nature Biotechnology},
+  title     = {Extracting a cellular hierarchy from high-dimensional cytometry data with {SPADE}},
+  year      = {2011},
+  month     = {oct},
+  number    = {10},
+  pages     = {886--891},
+  volume    = {29},
+  doi       = {10.1038/nbt.1991},
+  publisher = {Springer Science and Business Media {LLC}},
+}
+
+@Article{Qiu2017,
+  author    = {Peng Qiu},
+  journal   = {Cytometry Part A},
+  title     = {Toward deterministic and semiautomated {SPADE} analysis},
+  year      = {2017},
+  month     = {feb},
+  number    = {3},
+  pages     = {281--289},
+  volume    = {91},
+  doi       = {10.1002/cyto.a.23068},
+  publisher = {Wiley},
+}
+
 @Comment{jabref-meta: databaseType:bibtex;}
 
 @Comment{jabref-meta: grouping:
diff --git a/tex/thesis.tex b/tex/thesis.tex
index ff9ebf1..67b6fd3 100644
--- a/tex/thesis.tex
+++ b/tex/thesis.tex
@@ -123,6 +123,7 @@
 \newacronym{colb}{COL-B}{collagenase B}
 \newacronym{cold}{COL-D}{collagenase D}
 \newacronym{tsne}{tSNE}{t-stochastic neighbor embedding}
+\newacronym{umap}{UMAP}{uniform manifold approximation and projection}
 \newacronym{anv}{AXV}{Annexin-V}
 \newacronym{pi}{PI}{propidium iodide}
 \newacronym{rt}{RT}{room temperature}
@@ -1103,48 +1104,68 @@ directions for future work. To this end, the types of \glspl{doe} we generally
 used in this work were fractional factorial designs with three levels, which
 enable the estimation of both main effects and second order quadratic effects.
 
-\subsection*{identification and standardization of CPPs and CQAs}\label{sec:background_cqa}
+\subsection*{identification and standardization of CPPs and
+  CQAs}\label{sec:background_cqa}
 
-Ultimately the identification of relevant \glspl{cpp} and \glspl{cqa} is an
-interative process
+% BACKGROUND at least attempt to show that there is alot of work in the space
+% identifying signaling networks
 
-A number of multiomics strategies exist which can generate rich datasets for T
-cells. We will consider several multiomics strategies within this proposal:
+In the context of T cell manufacturing, ideally we would have a set of
+non-destructive biomarkers that could both identify functional T cells and
+predict when a process is on track to deliver such functional T cells. T cells
+secrete numerous cytokines and metabolites in the media, which may reflect the
+internal state accurately and thus serve as a potential set of \glspl{cqa}.
+
+The complexity of these pathways dictates that we take a big-data approach to
+this problem. To this end, there are several pertinent multi-omic (or simply
+`omic') techniques that can be used to collect such datasets, which can then be
+mined, modeled, and correlated to relevent responses (such as an endpoint
+quantification of memory T cells) to identify pertinent \glspl{cqa}.
+
+An overview of the techniques used in this work are:
 
 \begin{description}
-\item[Luminex:] A multiplexed bead-based \gls{elisa} that can measure
+\item[Luminex --] This is a multiplexed bead-based assay similar to \gls{elisa} that can measure
   many bulk (not single cell) cytokine concentrations simultaneously
-  in a media sample. Since this only requires media (as opposed to
-  destructively measuring cells) we will use this as a longitudinal
-  measurement.
-\item[Metabolomics:] It is well known that T cells of different
+  in a media sample. This is a destructive assay but does not require cells to
+  obtain a measurement.
+\item[\gls{nmr} --] It is well known that T cells of different
   lineages have different metabolic profiles; for instance memory T
   cells have larger aerobic capacity and fatty acid
-  oxidation\cite{Buck2016, van_der_Windt_2012}. We will interrogate
-  key metabolic species using \gls{nmr} in collaboration with the
-  Edison Lab at the University of Georgia. This will be both a
-  longitudinal assay using media samples (since some metabolites may
-  be expelled from cells that are indicative of their phenotype) and
-  at endpoint where we will lyse the cells and interogate their entire
-  metabolome.
-\item[Flow and Mass Cytometry:] Flow cytometry using fluorophores has been used
-  extensively for immune cell analysis, but has a practical limit of
+  oxidation\cite{Buck2016, van_der_Windt_2012}. \gls{nmr} is a technique that
+  can non-destructively quantify small molecules in a media sample, and thus is
+  an attractive method that could be used for inline, real-time monitoring.
+\item[Flow and Mass Cytometry --] Flow cytometry using fluorophores has been
+  used extensively for immune cell analysis, but has a practical limit of
   approximately 18 colors\cite{Spitzer2016}. Mass cytometry is analogous to
   traditional flow cytometry except that it uses heavy-metal \gls{mab}
-  conjugates, which has a practical limit of over 50 markers. This will be
-  useful in determining precise subpopulations and phenotypes that may be
-  influencing responses, especially when one considers that many cell types can
-  be defined by more than one marker combination. We will perform this at
-  endpoint. While mass cytometry is less practical than simple flow cytometers
-  such as the BD Accuri, we may find that only a few markers are required to
-  accurately predict performance, and thus this could easily translate to
-  industry using relatively cost-effective equipment.
+  conjugates, which has a practical limit of over 50 markers. While mass
+  cytometry is less practical than simple flow cytometers such as the BD Accuri,
+  we may find that only a few markers are required to accurately predict
+  performance, and thus this could easily translate to industry using relatively
+  cost-effective equipment. Both of these destructively analyze the cells
+  themselves, but they have the advantage in that they are measuring a direct
+  property of the cells and not a secreted product.
 \end{description}
 
-% TODO add a computational section
+% BACKGROUND what about ssRNAseq?
 
-% TODO add a section explaining causal inference since this is a big part of
-% the end of aim 1
+Upon collecting these omic datasets, determining the \glspl{cqa} becomes a
+computational problem. Predictions of the final product using data collected
+earlier in time can be made using any number of supervised learning techniques
+(linear and non-linear regression in all its forms) which in turn can be used to
+develop process control models. Unsupervised learning and dimensionality
+reduction techniques such as \gls{tsne}, \gls{umap}, and
+\gls{spade}\cite{Qiu2011, Qiu2017} can be performed to delineate clusters of
+interesting cell types and the markers that define them.
+
+Ultimately, identifying \glspl{cqa} will likely be an iterative process, wherein
+putative \glspl{cqa} will be identified, the corresponding \glspl{cpp} will be
+set and optimized to maximize products with these \glspl{cpp} and then
+additional data will be collected in the clinic as the product is tested on
+various patients with different indications. Additional \glspl{cqa} may be
+identified which better predict specific clinical outcomes, which can be fed
+back into the process model and optimized again.
 
 \section{Innovation}