From be2bf9fa958733350d471621fafb32739832fb87 Mon Sep 17 00:00:00 2001 From: ndwarshuis Date: Mon, 2 Aug 2021 18:45:33 -0400 Subject: [PATCH] ENH make the modeling section better --- tex/references.bib | 26 +++++++++++++++ tex/thesis.tex | 81 +++++++++++++++++++++++++++++----------------- 2 files changed, 77 insertions(+), 30 deletions(-) diff --git a/tex/references.bib b/tex/references.bib index f7fe8b9..adeb255 100644 --- a/tex/references.bib +++ b/tex/references.bib @@ -2425,6 +2425,32 @@ CONCLUSIONS: We developed a simplified, semi-closed system for the initial selec publisher = {Wiley}, } +@Article{Qiu2011, + author = {Peng Qiu and Erin F Simonds and Sean C Bendall and Kenneth D Gibbs and Robert V Bruggner and Michael D Linderman and Karen Sachs and Garry P Nolan and Sylvia K Plevritis}, + journal = {Nature Biotechnology}, + title = {Extracting a cellular hierarchy from high-dimensional cytometry data with {SPADE}}, + year = {2011}, + month = {oct}, + number = {10}, + pages = {886--891}, + volume = {29}, + doi = {10.1038/nbt.1991}, + publisher = {Springer Science and Business Media {LLC}}, +} + +@Article{Qiu2017, + author = {Peng Qiu}, + journal = {Cytometry Part A}, + title = {Toward deterministic and semiautomated {SPADE} analysis}, + year = {2017}, + month = {feb}, + number = {3}, + pages = {281--289}, + volume = {91}, + doi = {10.1002/cyto.a.23068}, + publisher = {Wiley}, +} + @Comment{jabref-meta: databaseType:bibtex;} @Comment{jabref-meta: grouping: diff --git a/tex/thesis.tex b/tex/thesis.tex index ff9ebf1..67b6fd3 100644 --- a/tex/thesis.tex +++ b/tex/thesis.tex @@ -123,6 +123,7 @@ \newacronym{colb}{COL-B}{collagenase B} \newacronym{cold}{COL-D}{collagenase D} \newacronym{tsne}{tSNE}{t-stochastic neighbor embedding} +\newacronym{umap}{UMAP}{uniform manifold approximation and projection} \newacronym{anv}{AXV}{Annexin-V} \newacronym{pi}{PI}{propidium iodide} \newacronym{rt}{RT}{room temperature} @@ -1103,48 +1104,68 @@ directions for future work. To this end, the types of \glspl{doe} we generally used in this work were fractional factorial designs with three levels, which enable the estimation of both main effects and second order quadratic effects. -\subsection*{identification and standardization of CPPs and CQAs}\label{sec:background_cqa} +\subsection*{identification and standardization of CPPs and + CQAs}\label{sec:background_cqa} -Ultimately the identification of relevant \glspl{cpp} and \glspl{cqa} is an -interative process +% BACKGROUND at least attempt to show that there is alot of work in the space +% identifying signaling networks -A number of multiomics strategies exist which can generate rich datasets for T -cells. We will consider several multiomics strategies within this proposal: +In the context of T cell manufacturing, ideally we would have a set of +non-destructive biomarkers that could both identify functional T cells and +predict when a process is on track to deliver such functional T cells. T cells +secrete numerous cytokines and metabolites in the media, which may reflect the +internal state accurately and thus serve as a potential set of \glspl{cqa}. + +The complexity of these pathways dictates that we take a big-data approach to +this problem. To this end, there are several pertinent multi-omic (or simply +`omic') techniques that can be used to collect such datasets, which can then be +mined, modeled, and correlated to relevent responses (such as an endpoint +quantification of memory T cells) to identify pertinent \glspl{cqa}. + +An overview of the techniques used in this work are: \begin{description} -\item[Luminex:] A multiplexed bead-based \gls{elisa} that can measure +\item[Luminex --] This is a multiplexed bead-based assay similar to \gls{elisa} that can measure many bulk (not single cell) cytokine concentrations simultaneously - in a media sample. Since this only requires media (as opposed to - destructively measuring cells) we will use this as a longitudinal - measurement. -\item[Metabolomics:] It is well known that T cells of different + in a media sample. This is a destructive assay but does not require cells to + obtain a measurement. +\item[\gls{nmr} --] It is well known that T cells of different lineages have different metabolic profiles; for instance memory T cells have larger aerobic capacity and fatty acid - oxidation\cite{Buck2016, van_der_Windt_2012}. We will interrogate - key metabolic species using \gls{nmr} in collaboration with the - Edison Lab at the University of Georgia. This will be both a - longitudinal assay using media samples (since some metabolites may - be expelled from cells that are indicative of their phenotype) and - at endpoint where we will lyse the cells and interogate their entire - metabolome. -\item[Flow and Mass Cytometry:] Flow cytometry using fluorophores has been used - extensively for immune cell analysis, but has a practical limit of + oxidation\cite{Buck2016, van_der_Windt_2012}. \gls{nmr} is a technique that + can non-destructively quantify small molecules in a media sample, and thus is + an attractive method that could be used for inline, real-time monitoring. +\item[Flow and Mass Cytometry --] Flow cytometry using fluorophores has been + used extensively for immune cell analysis, but has a practical limit of approximately 18 colors\cite{Spitzer2016}. Mass cytometry is analogous to traditional flow cytometry except that it uses heavy-metal \gls{mab} - conjugates, which has a practical limit of over 50 markers. This will be - useful in determining precise subpopulations and phenotypes that may be - influencing responses, especially when one considers that many cell types can - be defined by more than one marker combination. We will perform this at - endpoint. While mass cytometry is less practical than simple flow cytometers - such as the BD Accuri, we may find that only a few markers are required to - accurately predict performance, and thus this could easily translate to - industry using relatively cost-effective equipment. + conjugates, which has a practical limit of over 50 markers. While mass + cytometry is less practical than simple flow cytometers such as the BD Accuri, + we may find that only a few markers are required to accurately predict + performance, and thus this could easily translate to industry using relatively + cost-effective equipment. Both of these destructively analyze the cells + themselves, but they have the advantage in that they are measuring a direct + property of the cells and not a secreted product. \end{description} -% TODO add a computational section +% BACKGROUND what about ssRNAseq? -% TODO add a section explaining causal inference since this is a big part of -% the end of aim 1 +Upon collecting these omic datasets, determining the \glspl{cqa} becomes a +computational problem. Predictions of the final product using data collected +earlier in time can be made using any number of supervised learning techniques +(linear and non-linear regression in all its forms) which in turn can be used to +develop process control models. Unsupervised learning and dimensionality +reduction techniques such as \gls{tsne}, \gls{umap}, and +\gls{spade}\cite{Qiu2011, Qiu2017} can be performed to delineate clusters of +interesting cell types and the markers that define them. + +Ultimately, identifying \glspl{cqa} will likely be an iterative process, wherein +putative \glspl{cqa} will be identified, the corresponding \glspl{cpp} will be +set and optimized to maximize products with these \glspl{cpp} and then +additional data will be collected in the clinic as the product is tested on +various patients with different indications. Additional \glspl{cqa} may be +identified which better predict specific clinical outcomes, which can be fed +back into the process model and optimized again. \section{Innovation}