ENH proofread aim2a

2021-08-04 18:24:06 -04:00 · 2021-08-04 18:24:06 -04:00 · 4789723e72
parent fa716f9d78
commit 4789723e72
9 changed files with 372 additions and 268 deletions
--- a/tables/doe_cd4.tex
+++ b/tables/doe_cd4.tex
@ -16,8 +16,8 @@
 Observations & 30 \\ 
 R$^{2}$ & 0.888 \\ 
 Adjusted R$^{2}$ & 0.870 \\ 
-Residual Std. Error & 727,042.800 (df = 25) \\ 
+% Residual Std. Error & 727,042.800 (df = 25) \\ 
-F Statistic & 49.454$^{***}$ (df = 4; 25) \\ 
+% F Statistic & 49.454$^{***}$ (df = 4; 25) \\ 
 \hline 
 \hline \\[-1.8ex] 
 \textit{Note:}  & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
--- a/tables/doe_mem1.tex
+++ b/tables/doe_mem1.tex
@ -16,8 +16,8 @@
 Observations & 30 \\ 
 R$^{2}$ & 0.331 \\ 
 Adjusted R$^{2}$ & 0.224 \\ 
-Residual Std. Error & 3,659,501.000 (df = 25) \\ 
+% Residual Std. Error & 3,659,501.000 (df = 25) \\ 
-F Statistic & 3.096$^{**}$ (df = 4; 25) \\ 
+% F Statistic & 3.096$^{**}$ (df = 4; 25) \\ 
 \hline 
 \hline \\[-1.8ex] 
 \textit{Note:}  & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
--- a/tables/doe_mem2.tex
+++ b/tables/doe_mem2.tex
@ -23,8 +23,8 @@
 Observations & 30 \\ 
 R$^{2}$ & 0.741 \\ 
 Adjusted R$^{2}$ & 0.583 \\ 
-Residual Std. Error & 0.228 (df = 18) \\ 
+% Residual Std. Error & 0.228 (df = 18) \\ 
-F Statistic & 4.693$^{***}$ (df = 11; 18) \\ 
+% F Statistic & 4.693$^{***}$ (df = 11; 18) \\ 
 \hline 
 \hline \\[-1.8ex] 
 \textit{Note:}  & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
--- a/tables/doe_mem4.tex
+++ b/tables/doe_mem4.tex
@ -14,8 +14,8 @@
 Observations & 30 \\ 
 R$^{2}$ & 0.835 \\ 
 Adjusted R$^{2}$ & 0.808 \\ 
-Residual Std. Error & 493,168.700 (df = 25) \\ 
+% Residual Std. Error & 493,168.700 (df = 25) \\ 
-F Statistic & 31.571$^{***}$ (df = 4; 25) \\ 
+% F Statistic & 31.571$^{***}$ (df = 4; 25) \\ 
 \hline 
 \hline \\[-1.8ex] 
 \textit{Note:}  & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
--- a/tables/doe_ratio.tex
+++ b/tables/doe_ratio.tex
@ -15,8 +15,8 @@
 Observations & 30 \\ 
 R$^{2}$ & 0.879 \\ 
 Adjusted R$^{2}$ & 0.860 \\ 
-Residual Std. Error & 0.039 (df = 25) \\ 
+% Residual Std. Error & 0.039 (df = 25) \\ 
-F Statistic & 45.554$^{***}$ (df = 4; 25) \\ 
+% F Statistic & 45.554$^{***}$ (df = 4; 25) \\ 
 \hline 
 \hline \\[-1.8ex] 
 \textit{Note:}  & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ 
--- a/tables/doe_runs.tex
+++ b/tables/doe_runs.tex
@ -1,7 +1,7 @@
 \begin{tabular}{ccccc}
 \\[-1.8ex]\hline 
 \hline \\[-1.8ex] 
-\\[-1.8ex] Dataset & Run & IL2 Conc (\si{\IU\per\ml}) & DMS Conc. (\si{\IU\per\ml})
+\\[-1.8ex] Dataset & Run & IL2 Conc (\si{\IU\per\ml}) & DMS Conc. (\si{\dms\per\ml})
  & Functional \gls{mab} (\si{\percent}) \\
 \hline \\[-1.8ex]
 DOE & 1 & 30 & 500 & 100\\
--- a/tables/model_results.tex
+++ b/tables/model_results.tex
@ -17,7 +17,7 @@ PP+S6 & \SI{98}{\percent} & \SI{71.4}{\percent} & \SI{99.9}{\percent} & \SI{75.0
 PP+S6+N6 & \SI{98}{\percent} & \SI{68.2}{\percent} & \SI{95.6}{\percent} & \SI{74.4}{\percent} & \SI{72.5}{\percent} & \SI{81.7}{\percent} & \SI{77.0}{\percent}\\
 \hline \\
-\multicolumn{8}{l}{\ptmemh{} cells} \\ 
+\multicolumn{8}{l}{\ptmemk{} cells} \\ 
 PP+N4 & \SI{93}{\percent} & \SI{4.7}{\percent} & \SI{44.4}{\percent} & \SI{9.2}{\percent} & \SI{1.2}{\percent} & \SI{65.1}{\percent} & \SI{9.1}{\percent}\\
 PP+N6 & \SI{86}{\percent} & \SI{2.0}{\percent} & \SI{29.9}{\percent} & \SI{15.8}{\percent} & \SI{28.5}{\percent} & \SI{63.3}{\percent} & \SI{30.6}{\percent}\\
 PP+S6 & \SI{93}{\percent} & \SI{7.8}{\percent} & \SI{28.0}{\percent} & \SI{15.1}{\percent} & \SI{76.2}{\percent} & \SI{98.4}{\percent} & \SI{49.8}{\percent}\\
--- a/tex/references.bib
+++ b/tex/references.bib
@ -2673,6 +2673,62 @@ CONCLUSIONS: We developed a simplified, semi-closed system for the initial selec
  publisher = {The American Association of Immunologists},
 }
@Article{Delaglio1995,
  author    = {Frank Delaglio and Stephan Grzesiek and GeertenW. Vuister and Guang Zhu and John Pfeifer and Ad Bax},
  journal   = {Journal of Biomolecular {NMR}},
  title     = {{NMRPipe}: A multidimensional spectral processing system based on {UNIX} pipes},
  year      = {1995},
  month     = {nov},
  number    = {3},
  volume    = {6},
  doi       = {10.1007/bf00197809},
  publisher = {Springer Science and Business Media {LLC}},
 }
@Article{Dieterle2006,
  author    = {Frank Dieterle and Alfred Ross and Götz Schlotterbeck and Hans Senn},
  journal   = {Analytical Chemistry},
  title     = {Probabilistic Quotient Normalization as Robust Method to Account for Dilution of Complex Biological Mixtures. Application in1H {NMR} Metabonomics},
  year      = {2006},
  month     = {jul},
  number    = {13},
  pages     = {4281--4290},
  volume    = {78},
  doi       = {10.1021/ac051632c},
  publisher = {American Chemical Society ({ACS})},
 }
@Article{Dashti2017,
  author    = {Hesam Dashti and William M. Westler and Marco Tonelli and Jonathan R. Wedell and John L. Markley and Hamid R. Eghbalnia},
  journal   = {Analytical Chemistry},
  title     = {Spin System Modeling of Nuclear Magnetic Resonance Spectra for Applications in Metabolomics and Small Molecule Screening},
  year      = {2017},
  month     = {nov},
  number    = {22},
  pages     = {12201--12208},
  volume    = {89},
  doi       = {10.1021/acs.analchem.7b02884},
  publisher = {American Chemical Society ({ACS})},
 }
@InProceedings{Kordona,
  author    = {A.K. Kordon and Ching-Tai Lue},
  booktitle = {Proceedings of the 2004 Congress on Evolutionary Computation ({IEEE} Cat. No.04TH8753)},
  title     = {Symbolic regression modeling of blown film process effects},
  year      = {2004},
  publisher = {{IEEE}},
  doi       = {10.1109/cec.2004.1330907},
 }
@Book{Koza1992,
  author    = {Koza, John},
  publisher = {MIT Press},
  title     = {Genetic programming : on the programming of computers by means of natural selection},
  year      = {1992},
  address   = {Cambridge, Mass},
  isbn      = {0262111705},
 }
@Comment{jabref-meta: databaseType:bibtex;}
@Comment{jabref-meta: grouping:
--- a/tex/thesis.tex
+++ b/tex/thesis.tex
@ -103,8 +103,9 @@
 \tcellacronym{tc}{c}{cytotoxic}{}
 \tcellacronym{th1}{h}{type 1 helper}{1}
 \tcellacronym{th2}{h}{type 2 helper}{2}
-% \tcellacronym{th17}{h}{\il{17} helper}{1}
+\tcellacronym{th17}{h}{IL-17 helper}{17}
 \newacronym{bcaa}{BCAA}{branched-chain amino acid}
 \newacronym{til}{TIL}{tumor infiltrating lymphocyte}
 \newacronym{tcr}{TCR}{T cell receptor}
 \newacronym{act}{ACT}{adoptive cell therapies}
@ -207,6 +208,7 @@
 \newacronym{scfv}{scFv}{single-chain fragment variable}
 \newacronym{hepes}{HEPES}{4-(2-hydroxyethyl)-1-piperazineethanesulfonic acid}
 \newacronym{nhs}{NHS}{N-hydroxysulfosuccinimide}
 \newacronym{tocsy}{TOCSY}{total correlation spectroscopy}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % SI units for uber nerds
@ -282,10 +284,13 @@
 % so I don't need to worry about abbreviating all the different interleukins
 \newcommand{\il}[1]{\gls{il}-#1}
-% DOE responses I don't feel like typing ad-nauseam
+% DOE stuff I don't feel like typing ad-nauseam
 \newcommand{\pilII}{\gls{il2} concentration}
 \newcommand{\pdms}{\gls{dms} concentration}
 \newcommand{\pmab}{functional \gls{mab} surface density}
 \newcommand{\rmemh}{total \ptmemh{} cells}
 \newcommand{\rmemk}{total \ptmemk{} cells}
 \newcommand{\rratio}{CD4/CD8 ratio}
 % vendor and product stuff I don't feel like typing
 \newcommand{\catnum}[2]{(#1, #2)}
@ -2683,12 +2688,12 @@ necessary to have a fully-automated manufacturing system.
 The \gls{dms} system could be used as a drop in replacement for beads in many of
 current allogeneic therapies. Indeed, given its higher potential for expansion
-(\cref{fig:dms_exp,tab:ci_controlled}, it may work in cases where the beads fail
+(\cref{fig:dms_exp,tab:ci_controlled}), it may work in cases where the beads
-(although this would need to be tested by gathering data with many unhealthy
+fail (although this would need to be tested by gathering data with many
-donors). However, in the autologous setting patients only need a fixed dose, and
+unhealthy donors). However, in the autologous setting patients only need a fixed
-thus any expansion beyond the indicated dose would be wasted. Given this, it
+dose, and thus any expansion beyond the indicated dose would be wasted. Given
-will be interesting to apply this technology in an allogeneic paradigm where
+this, it will be interesting to apply this technology in an allogeneic paradigm
-this increased expansion potential would be well utilized.
+where this increased expansion potential would be well utilized.
 Finally, we should note that while we demonstrated a method providing superior
 performance compared to bead-based expansion, the cell manufacturing field would
@ -2702,20 +2707,20 @@ manufacturing companies.
 \section{introduction}
-The purpose of this sub-aim was to develop computational methods to identify novel
+The purpose of this sub-aim was to develop computational methods to identify
-\glspl{cqa} and \glspl{cpp} that could be used for release criteria, process
+novel \glspl{cqa} and \glspl{cpp} that could be used for release criteria,
-control, and process optimization for the \gls{dms} platform. We hypothesized
+process control, and process optimization for the \gls{dms} platform. We
-that T cells grown using the \gls{dms} system would produce detectable
+hypothesized that T cells grown using the \gls{dms} system would produce
-biological signatures in the media supernatent which corresponded to clinically
+detectable biological signatures in the media supernatent which corresponded to
-relevent responses such as the fold expansion of the T cells or the resulting
+clinically relevent responses such as fold expansion or phenotype. We tested
-phenotype. We tested this hypothesis by activating T cells under a variety of
+this hypothesis by activating T cells under a variety of conditions using a
-conditions using a \gls{doe}, sampling the media at intermediate timepoints, and
+\gls{doe}, sampling the media at intermediate timepoints, and creating models to
-creating models to predict the outcome of the cultures. We should stress that
+predict the outcome of the cultures. We should stress that the specific
-the specific \glspl{cpp} and \glspl{cqa} determined by this aim are not
+\glspl{cpp} and \glspl{cqa} determined by this aim are not necessarily
-necessarily universal, as this was not performed with equipment that would
+universal, as this was not performed with equipment that would normally be used
-normally be used at scale. However, the process outlined here is one that can
+at scale. However, the process outlined here is one that can easily be adaptable
-easily be adaptable to any system, and the specific findings themselves offer
+to any system, and the specific findings themselves offer interesting insights
-interesting insights that warrant further study.
+that warrant further study.
 \section{methods}
@ -2744,33 +2749,38 @@ interesting insights that warrant further study.
  \label{fig:mod_overview}
 \end{figure*}
-The first DOE resulted in a randomized 18-run I-optimal custom design where each
+The overall workflow of this aim is shown in \cref{fig:mod_overview_flow}.
-DMS parameter was evaluated at three levels: IL2 concentration (10, 20, and 30
+Experimental conditions within the design space were explored using a \gls{doe},
-U/uL), DMS concentration (500, 1500, 2500 carrier/uL), and functionalized
+and longitudinal samples were collected for each condition as the cultures
-antibody percent (60\%, 80\%, 100\%). These 18 runs consisted of 14 unique
+progressed. Data from inputs and/or longitudinal samples were used to predict
-parameter combinations where 4 of them were replicated twice to assess
+the endpoint response. The fusion of cytokine and \gls{nmr} profiles from media
-prediction error. Process parameters for the ADOE were evaluated at multiple
+to model these responses included 30 cytokines from a custom Thermo Fisher
-levels: IL2 concentration (30, 35, and 40 U/uL), DMS concentration (500, 1000,
+ProcartaPlex Luminex kit and 20 \gls{nmr} features. These 20 spectral features
-1500, 2000, 2500, 3000, 3500 carrier/uL), and functionalized antibody percent
+from \gls{nmr} media analysis were selected out of approximately 250 peaks
-(100\%) as depicted in Fig.1b. To further optimize the initial region explored
+through the implementation of a variance-based feature selection approach and
-(DOE) in terms of total live CD4+ TN+TCM cells, a sequential adaptive
+some manual inspection steps.
-design-of-experiment (ADOE) was designed with 10 unique parameter combinations,
+
-two of these replicated twice for a total of 12 additional samples (Fig.1b). The
+The first \gls{doe} resulted in a randomized 18-run I-optimal custom design
-fusion of cytokine and NMR profiles from media to model these responses included
+where each \gls{dms} parameter was evaluated at three levels: \pilII{} (10, 20,
-30 cytokines from a custom Thermo Fisher ProcartaPlex Luminex kit and 20 NMR
+and 30 U/uL), \pdms{} (500, 1500, 2500 \si{\dms\per\ul}), and \pmab{} (60, 80,
-features. These 20 spectral features from NMR media analysis were selected out
+100 \si{\percent}). These 18 runs consisted of 14 unique parameter combinations
-of approximately 250 peaks through the implementation of a variance-based
+where 4 of them were replicated twice to assess prediction error. To further
-feature selection approach and some manual inspection steps.
+optimize the initial region explored, an \gls{adoe} was designed with 10 unique
 parameter combinations, two of these replicated twice for a total of 12
 additional samples (\cref{fig:mod_overview_doe}). Process parameters for the
 \gls{adoe} were evaluated at multiple levels: \pilII{} (30, 35, and 40
 \si{\IU\per\ml}), \pdms{} (500, 1000, 1500, 2000, 2500, 3000, 3500
 \si{\dms\per\ml}), and \pmab{} (\SI{100}{\percent}) (\cref{fig:mod_overview}).
 \subsection{DMS fabrication}
 \glspl{dms} were fabricated as described in \cref{sec:dms_fab} with the
 following modifications in order to obtain a variable functional \gls{mab}
-surface density. During the \gls{mab} coating step, the anti-CD3/anti-CD28 mAb
+surface density. During the \gls{mab} coating step, the \acd{3}/\acd{28}
-mixture was further combined with a biotinylated isotype control to reduce the
+\gls{mab} mixture was further combined with a biotinylated isotype control to
-overall fraction of targeted \glspl{mab} (for example the \SI{60}{\percent}
+reduce the overall fraction of targeted \glspl{mab} (for example the
-\gls{mab} surface density corresponded to 3 mass parts \acd{3}, 3 mass parts
+\SI{60}{\percent} \gls{mab} surface density corresponded to 3 mass parts
-\acd{28}, and 4 mass parts isotype control).
+\acd{3}, 3 mass parts \acd{28}, and 4 mass parts isotype control).
 \subsection{T cell culture}
@ -2779,9 +2789,9 @@ following modifications. At days 4, 6, 8, and 11, \SI{100}{\ul} media were
 collected for the Luminex assay and \gls{nmr} analysis. The volume of removed
 media was equivalently replaced during the media feeding step, which took place
 immediately after sample collection. Additionally, the same media feeding
-schedule was followed for the DOE and ADOE to improve consistency, and the same
+schedule was followed for the \gls{doe} and \gls{adoe} to improve consistency,
-donor lot was used for both experiments. All cell counts were performed using
+and the same donor lot was used for both experiments. All cell counts were
-\gls{aopi}.
+performed using \gls{aopi}.
 \subsection{flow cytometry}
@ -2796,26 +2806,29 @@ Cytokines were quantified via Luminex as described in
 Prior to analysis, samples were centrifuged at \SI{2990}{\gforce} for
 \SI{20}{\minute} at \SI{4}{\degreeCelsius} to clear any debris\footnote{all
-  \gls{nmr} analysis was done by our collaborators at the University of
+  \gls{nmr} analysis was done by our collaborators Max Colonna and Art Edison at
-  Georgia}. 5 uL of 100/3 mM DSS-D6 in deuterium oxide (Cambridge Isotope
+  the University of Georgia; methods included here for reference}. \SI{5}{\ul} of
-Laboratories) were added to 1.7 mm NMR tubes (Bruker BioSpin), followed by 45 uL
+100/3 \si{\mM} DSS-D6 in deuterium oxide (Cambridge Isotope Laboratories) were
-of media from each sample that was added and mixed, for a final volume of 50 uL
+added to \SI{1.7}{\mm} \gls{nmr} tubes (Bruker BioSpin), followed by
-in each tube. Samples were prepared on ice and in predetermined, randomized
+\SI{45}{\ul} of media from each sample that was added and mixed, for a final
-order. The remaining volume from each sample in the rack (approx. 4 uL) was
+volume of \SI{50}{\ul} in each tube. Samples were prepared on ice and in
-combined to create an internal pool. This material was used for internal
+predetermined, randomized order. The remaining volume from each sample in the
-controls within each rack as well as metabolite annotation.
+rack (approx. \SI{4}{\ul}) was combined to create an internal pool. This
 material was used for internal controls within each rack as well as metabolite
 annotation.
-\gls{nmr} spectra were collected on a Bruker Avance III HD spectrometer at 600
+\gls{nmr} spectra were collected on a Bruker Avance III HD spectrometer at
-MHz using a 5-mm TXI cryogenic probe and TopSpin software (Bruker BioSpin).
+\SI{600}{\MHz} using a \SI{5}{\mm} TXI cryogenic probe and TopSpin software
-One-dimensional spectra were collected on all samples using the noesypr1d pulse
+(Bruker BioSpin). One-dimensional spectra were collected on all samples using
-sequence under automation using ICON NMR software. Two-dimensional \gls{hsqc}
+the noesypr1d pulse sequence under automation using ICON NMR software.
-and TOCSY spectra were collected on internal pooled control samples for
+Two-dimensional \gls{hsqc} and \gls{tocsy} spectra were collected on internal
-metabolite annotation.
+pooled control samples for metabolite annotation.
 One-dimensional spectra were manually phased and baseline corrected in TopSpin.
-Two-dimensional spectra were processed in NMRpipe37. One dimensional spectra
+Two-dimensional spectra were processed in NMRpipe\cite{Delaglio1995}. One
-were referenced, water/end regions removed, and normalized with the PQN
+dimensional spectra were referenced, water/end regions removed, and normalized
-algorithm38 using an in-house MATLAB (The MathWorks, Inc.) toolbox.
+with the PQN algorithm\cite{Dieterle2006} using an in-house MATLAB (The
 MathWorks, Inc.) toolbox.
 % TODO add the supplemental figure alluded to here?
 To reduce the total number of spectral features from approximately 250 peaks and
@ -2824,19 +2837,22 @@ variance-based feature selection was performed within MATLAB. For each digitized
 point on the spectrum, the variance was calculated across all experimental
 samples and plotted. Clearly-resolved features corresponding to peaks in the
 variance spectrum were manually binned and integrated to obtain quantitative
-feature intensities across all samples (Supp.Fig.S24). In addition to highly
+feature intensities across all samples.
-variable features, several other clearly resolved and easily identifiable
+% (Supp.Fig.S24).
-features were selected (glucose, BCAA region, etc). Some features were later
+In addition to highly variable features, several other clearly resolved and
-discovered to belong to the same metabolite but were included in further
+easily identifiable features were selected (glucose, \gls{bcaa} region, etc).
-analysis.
+Some features were later discovered to belong to the same metabolite but were
 included in further analysis.
 % I think this is the right source? it seems wrong in the manuscript but this
 % source at least talks about an optimization score
 Two-dimensional spectra collected on pooled samples were uploaded to COLMARm web
-server10, where \gls{hsqc} peaks were automatically matched to database peaks.
+server, where \gls{hsqc} peaks were automatically matched to database peaks.
 \gls{hsqc} matches were manually reviewed with additional 2D and proton spectra
 to confirm the match. Annotations were assigned a confidence score based upon
-the levels of spectral data supporting the match as previously described11.
+the levels of spectral data supporting the match as previously
-Annotated metabolites were matched to previously selected features used for
+described\cite{Dashti2017}. Annotated metabolites were matched to previously
-statistical analysis.
+selected features used for statistical analysis.
 % I'm pretty sure this isn't relevant
 % Using the list of annotated metabolites obtained above, an approximation of a
@ -2851,8 +2867,8 @@ statistical analysis.
 Several low abundance features selected for analysis did not have database
 matches and were not annotated. Statistical total correlation spectroscopy41
 suggested that some of these unknown features belonged to the same molecules
-(not shown). Additional multidimensional NMR experiments will be required to
+(not shown). Additional multidimensional \gls{nmr} experiments will be required
-determine their identity.
+to determine their identity.
 \subsection{machine learning and statistical analysis}
@ -2861,66 +2877,70 @@ Linear regression analysis of the \glspl{doe} was performed as described in
 Seven \gls{ml} techniques were implemented to predict three responses related to
 the memory phenotype of the cultured T cells under different process parameters
-conditions (i.e. Total Live CD4+ TN and TCM, Total Live CD8+ TN+TCM, and Ratio
+conditions (i.e. \rmemh{}, \rmemk{}, and \rratio{}). The \gls{ml} methods
-CD4+/CD8+ TN+TCM). The \gls{ml} methods executed were \gls{rf}, \gls{gbm},
+executed were \gls{rf}, \gls{gbm}, \gls{cif}, \gls{lasso}, \gls{plsr},
-\gls{cif}, \gls{lasso}, \gls{plsr}, \gls{svm}, and DataModeler’s
+\gls{svm}, and DataModeler’s \gls{sr}\footnote{\gls{sr} was performed by Theresa
-\gls{sr}\footnote{of these seven methods, all except \gls{lasso} were performed
+  Kotanchek at Evolved Analytics, \gls{rf}, \gls{gbm}, \gls{cif}, \gls{plsr},
-  by collaborators}. Primarily, \gls{sr} models were used to optimize process
+  \gls{svm} were performed by Valerie Odeh-Couvertier at UPRM. Methods included
-parameter values based on TN+TCM phenotype and to extract early predictive
+  here for reference}. Primarily, \gls{sr} models were used to optimize process
 parameter values based on \ptmem{} phenotype and to extract early predictive
 variable combinations from the multi-omics experiments. Furthermore, all
 regression methods were executed, and the high-performing models were used to
 perform a consensus analysis of the important variables to extract potential
-critical quality attributes and critical process parameters predictive of T-cell
+critical quality attributes and critical process parameters predictive of T cell
 potency, safety, and consistency at the early stages of the manufacturing
 process.
 \gls{sr} was done using Evolved Analytics’ DataModeler software (Evolved
 Analytics LLC, Midland, MI). DataModeler utilizes genetic programming to evolve
 symbolic regression models (both linear and non-linear) rewarding simplicity and
-accuracy. Using the selection criteria of highest accuracy (R2>90\% or
+accuracy. Using the selection criteria of highest accuracy
-noise-power) and lowest complexity, the top-performing models were identified.
+($R^2$>\SI{90}{\percent}) and lowest complexity, the top-performing models were
-Driving variables, variable combinations, and model dimensionality tables were
+identified. Driving variables, variable combinations, and model dimensionality
-generated. The top-performing variable combinations were used to generate model
+tables were generated. The top-performing variable combinations were used to
-ensembles. In this analysis, DataModeler’s SymbolicRegression function was used
+generate model ensembles. In this analysis, DataModeler’s
-to develop explicit algebraic (linear and nonlinear) models. The fittest models
+\inlinecode{SymbolicRegression} function was used to develop explicit algebraic
-were analyzed to identify the dominant variables using the VariablePresence
+(linear and nonlinear) models. The fittest models were analyzed to identify the
-function, the dominant variable combinations using the VariableCombinations
+dominant variables using the \inlinecode{VariablePresence} function, the
 dominant variable combinations using the \inlinecode{VariableCombinations}
 function, and the model dimensionality (number of unique variables) using the
-ModelDimensionality function. CreateModelEnsemble was used to define trustable
+\inlinecode{ModelDimensionality} function. \inlinecode{CreateModelEnsemble} was
-model ensembles using selected variable combinations and these were summarized
+used to define trustable model ensembles using selected variable combinations
-(model expressions, model phenotype, model tree plot, ensemble quality, model
+and these were summarized (model expressions, model phenotype, model tree plot,
-quality, variable presence map, \gls{anova} tables, model prediction plot, exportable
+ensemble quality, model quality, variable presence map, \gls{anova} tables,
-model forms) using the ModelSummaryTable function. Ensemble prediction and
+model prediction plot, exportable model forms) using the
-residual performance were respectively assessed via the EnsemblePredictionPlot
+\inlinecode{ModelSummaryTable} function. Ensemble prediction and residual
-and EnsembleResidualPlot subroutines. Model maxima (ModelMaximum function) and
+performance were assessed via the \inlinecode{EnsemblePredictionPlot} and
-model minima (ModelMinimum function) were calculated and displayed using the
+\inlinecode{EnsembleResidualPlot} subroutines respectively. Model maxima
-ResponsePlotExplorer function. Trade-off performance of multiple responses was
+(\inlinecode{ModelMaximum} function) and model minima (\inlinecode{ModelMinimum}
-explored using the MultiTargetResponseExplorer and ResponseComparisonExplorer
+function) were calculated and displayed using the
-with additional insights derived from the ResponseContourPlotExplorer. Graphics
+\inlinecode{ResponsePlotExplorer} function. Trade-off between multiple
-and tables were generated by DataModeler. These model ensembles were used to
+responses was explored using \inlinecode{MultiTargetResponseExplorer} and
-identify predicted response values, potential optima in the responses, and
+\inlinecode{ResponseComparisonExplorer} with additional insights derived from
-regions of parameter values where the predictions diverge the most.
+\inlinecode{ResponseContourPlotExplorer}. Graphics and tables were generated by
 DataModeler. These model ensembles were used to identify predicted response
 values, potential optima in the responses, and regions of parameter values where
 the predictions diverge the most.
 Non-parametric tree-based ensembles were done through the
 \inlinecode{randomForest}, inlinecode{gbm}, and \inlinecode{cforest} regression
 functions in R, for \gls{rf}, \gls{gbm}, and \gls{cif} models, respectively.
 Both \gls{rf} and \gls{cif} construct multiple decision trees in parallel, by
 randomly choosing a subset of features at each decision tree split, in the
-training stage. Random forest individual decision trees are split using the Gini
+training stage. \gls{rf} individual decision trees are split using the Gini
 Index, while conditional inference forest uses a statistical significance test
 procedure to select the variables at each split, reducing correlation bias. In
 contrast, \gls{gbm} construct regression trees in series through an iterative
 procedure that adapts over the training set. This model learns from the mistakes
-of previous regression trees in an iterative fashion to correct errors from its
+of previous regression trees in an iterative fashion to correct errors
-precursors’ trees (i.e. minimize \gls{mse}). Prediction performance was
+(\gls{mse}) from its precursors’ trees. Prediction performance was evaluated
-evaluated using \gls{loocv} and permutation-based
+using \gls{loocv} and permutation-based variable importance scores assessing
-variable importance scores assessing \% increase of \gls{mse}, relative
+percent increase of \gls{mse}, relative influence based on the increase of
-influence based on the increase of prediction error, coefficient values for
+prediction error, coefficient values for \gls{rf}, \gls{gbm}, and \gls{cif},
-\gls{rf}, \gls{gbm}, and \gls{cif}, respectively. \gls{plsr} was executed using
+respectively. \gls{plsr} was executed using the \inlinecode{plsr} function from
-the \inlinecode{plsr} function from the \inlinecode{pls} package in R while
+the \inlinecode{pls} package in R while \gls{lasso} regression was performed
-\gls{lasso} regression was performed using the \inlinecode{cv.glmnet} R package,
+using the \inlinecode{cv.glmnet} R package, both using \gls{loocv}. Finally, the
-both using leave-one-out cross-validation. Finally, the \inlinecode{kernlab} R
+\inlinecode{kernlab} R package was used to construct the \gls{svm} models.
 package was used to construct the \gls{svm} models.
 Parameter tuning was done for all models in a grid search manner using the train
 function from the \inlinecode{caret} R package using \gls{loocv} as the
@ -2936,12 +2956,12 @@ optimum \gls{loocv} performance as well. For \gls{plsr}, the optimal number of
 components to be used in the model was assessed based on the standard error of
 the cross-validation residuals using the function \inlinecode{selectNcomp} from
 the \inlinecode{pls} package. Moreover, \gls{lasso} regression was performed
-using the \inlinecode{cv.glmnet} package with alpha = 1. The best lambda for
+using the \inlinecode{cv.glmnet} package with $\upalpha$ = 1. The best
-each response was chosen using the minimum error criteria. Lastly, a fixed
+$\uplambda$ for each response was chosen using the minimum error criteria.
-linear kernel (i.e. \inlinecode{svmLinear}) was used to build the \gls{svm}
+Lastly, a fixed linear kernel (\inlinecode{svmLinear}) was used to build
-regression models evaluating the cost parameter value with best \gls{loocv}.
+the \gls{svm} regression models evaluating the cost parameter value with best
-Prediction performance was measured for all models using the final model with
+\gls{loocv}. Prediction performance was measured for all models using the final
-\gls{loocv} tuned parameters.
+model with \gls{loocv} tuned parameters.
 % TODO do I need this?
 % Table M2 shows the parameter values evaluated per model
@ -2961,16 +2981,17 @@ values, absolute coefficient values, weighted sum of absolute coefficients
 values, and relative importance from sensitivity analysis determined for
 \gls{rf}, \gls{gbm}, \gls{cif}, \gls{lasso}, \gls{plsr}, and \gls{svm},
 respectively. Using these scores, key predictive variables were selected if
-their importance scores were within the 80th percentile ranking for the
+their importance scores were within the \nth{80} percentile ranking for the
 following \gls{ml} methods: \gls{rf}, \gls{gbm}, \gls{cif}, \gls{lasso},
-\gls{plsr}, \gls{svm} while for \gls{sr} variables present in >30\% of the
+\gls{plsr}, \gls{svm} while for \gls{sr} variables present in >\SI{30}{\percent}
-top-performing \gls{sr} models from DataModeler (R2>= 90\%, Complexity >= 100)
+of the top-performing \gls{sr} models from DataModeler ($R^2\ge$
-were chosen to investigate consensus except for \gls{nmr} media models at day 4
+\SI{90}{\percent}, Complexity $\ge$ 100) were chosen to investigate consensus
-which considered a combination of the top-performing results of models excluding
+except for \gls{nmr} media models at day 4 which considered a combination of the
-lactate ppms, and included those variables which were in > 40\% of the best
+top-performing results of models excluding lactate ppms, and included those
-performing models. Only variables with those high percentile scoring values were
+variables which were in >\SI{40}{\percent} of the best performing models. Only
-evaluated in terms of their logical relation (intersection across \gls{ml}
+variables with those high percentile scoring values were evaluated in terms of
-models) and depicted using a Venn diagram from the \inlinecode{venn} R package.
+their logical relation (intersection across \gls{ml} models) and depicted using
 a Venn diagram from the \inlinecode{venn} R package.
 \section{results}
@ -3033,8 +3054,6 @@ advantage at lower \gls{il2} concentrations compared to beads. For this reason,
 we decided to investigate the lower range of \gls{il2} concentrations starting
 at \SI{10}{\IU\per\ml} throughout the remainder of this aim.
 % RESULT this is not consistent with the next section since the responses are
 % different
 \subsection{DOE shows optimal conditions for expanded potent T cells}
 % TABLE not all of these were actually used, explain why by either adding columns
@ -3045,7 +3064,6 @@ at \SI{10}{\IU\per\ml} throughout the remainder of this aim.
  \input{../tables/doe_runs.tex}
 \end{table}
 % RESULT integrate this figure into the results paragraph
 \begin{figure*}[ht!]
  \begingroup
@ -3183,7 +3201,7 @@ qualitatively observed in the response plot (\cref{fig:doe_responses_mem}).
 Furthermore, the dataset parameter was weakly significant, indicating a possible
 batch effect between the \glspl{doe}. We should also note that despite many
 parameters being significant, this model was still only mediocre in describing
-this response; the $R^2$ was 0.741 but the adjusted $R^2$ was 0.583, indicating
+this response; the $R^2$ was 0.741 but the $R_{adj}^2$ was 0.583, indicating
 that our data might be underpowered for a model this complex. Further
 experiments beyond what was performed here may be needed to fully describe this
 response.
@ -3193,7 +3211,7 @@ We performed linear regression on the other three responses, all of which
 performed much better than the \ptmem{} response as expected given the much
 lower apparent complexity in the response plots
 (\cref{fig:doe_responses_cd4,fig:doe_responses_mem4,fig:doe_responses_ratio}).
-All these models appeared to fit will, with $R^2$ and adjusted $R^2$ upward of
+All these models appeared to fit will, with $R^2$ and $R_{adj}^2$ upward of
 0.8. In all but the CD4:CD8 \ptmem{} ratio, the dataset parameter emerged as
 significant, indicating a batch effect between the \glspl{doe}. All other
 parameters except \pilII{} in the case of CD4:CD8 \ptmem{} ratio were
@ -3216,16 +3234,24 @@ significant predictors.
  \label{fig:doe_sr_contour}
 \end{figure*}
-We then visualized the total \ptmemh{} cells and CD4:CD8 \ptmem{} ratio using
+We then visualized the total \ptmemh{} cells and \rratio{} using the response
-the response explorer in DataModeler to create contour plots around the maximum
+explorer in DataModeler to create contour plots around the maximum responses.
-responses. For both, it appeared that maximizing all three input parameters
+For both, it appeared that maximizing all three input parameters resulted in the
-resulted in the maximum value for either response (\cref{fig:doe_responses}).
+maximum value for either response (\cref{fig:doe_sr_contour}). While not all
-While not all combinations at and around this optimum were tested, the model
+combinations at and around this optimum were tested, the model nonetheless
-nonetheless showed that there were no other optimal values or regions elsewhere
+showed that there were no other optimal values or regions elsewhere in the
-in the model.
+model.
-% TODO this section header sucks
+\subsection{Modeling with artificial intelligence methods reveals potential
-\subsection{AI modeling reveals highly predictive species}
+  CQAs}
 Due to the heterogeneity of the multivariate data collected and knowing that no
 single model is perfect for all applications, we implemented an agnostic
 modeling approach to better understand these \ptmem{} responses. To achieve
 this, a consensus analysis using seven \gls{ml} techniques, \gls{rf}, \gls{gbm},
 \gls{cif}, \gls{lasso}, \gls{plsr}, \gls{svm}, and DataModeler’s \gls{sr}, was
 implemented to molecularly characterize \ptmem{} cells and to extract predictive
 features of quality early in their expansion process.
 \begin{figure*}[ht!]
  \begingroup
@ -3238,34 +3264,38 @@ in the model.
  \label{fig:doe_luminex}
 \end{figure*}
-Due to the heterogeneity of the multivariate data collected and knowing that no
+We collected secretome data via luminex for days 4, 6, 8, 11, and 14.
-single model structure is perfect for all applications, we implemented an
+Plotting the concentrations of these cytokines showed a large variation over all
-agnostic modeling approach to better understand these TN+TCM responses. To
+runs and between different timepoints, demonstrated that these could potentially
-achieve this, a consensus analysis using seven machine learning (ML) techniques,
+be used to differentiate between different process conditions qualitatively
-Random Forest (RF), Gradient Boosted Machine (GBM), Conditional Inference Forest
+simply based on variance (\cref{fig:doe_luminex}). These were also much higher
-(CIF), Least Absolute Shrinkage and Selection Operator (LASSO), Partial
+in most cases that a set of bead based runs which were run in parallel, in
-Least-Squares Regression (PLSR), Support Vector Machine (SVM), and DataModeler’s
+agreement with the luminex data obtained previously in the Grex system (these
-Symbolic Regression (SR), was implemented to molecularly characterize TN+TCM
+data were collected in plates) (\cref{fig:grex_luminex}).
 cells and to extract predictive features of quality early on their expansion
 process (Fig.1d-e).
 % TABLE this table looks like crap, break it up into smaller tables
 \begin{table}[!h] \centering
-  \caption{Results for data-driven modeling}
+  \caption{Results for data-driven modeling using process parameters (PP) with
    only \gls{nmr} on day 4 (N4), only \gls{nmr} on day 6 (N6), only secretome
    on day 6 (S6), or various combindation of each for all seven \gls{ml}
    techniques}
  \label{tab:mod_results}
  \input{../tables/model_results.tex}
 \end{table}
-SR models achieved the highest predictive performance (R2>93\%) when using
+\gls{sr} models achieved the highest predictive performance
-multi-omics predictors for all endpoint responses (\cref{tab:mod_results}). SR
+($R^2$>\SI{93}{\percent}) when using multi-omics predictors for all endpoint
-achieved R2>98\% while GBM tree-based ensembles showed leave-one-out
+responses (\cref{tab:mod_results}). \gls{sr} achieved $R^2$>\SI{98}{\percent}
-cross-validated R2 (LOO-R2) >95\% for CD4+ and CD4+/CD8+ TN+TCM responses.
+while \gls{gbm} tree-based ensembles showed \gls{loocv} $R^2$ >
-Similarly, LASSO, PLSR, and SVM methods showed consistent high LOO-R2, 92.9\%,
+\SI{95}{\percent} for \rmemh{} and \rmemk{} responses. Similarly, \gls{lasso},
-99.7\%, and 90.5\%, respectively, to predict the CD4+/CD8+ TN+TCM. Yet, about
+\gls{plsr}, and \gls{svm} methods showed consistently high \gls{loocv},
-10\% reduction in LOO-R2, 72.5\%-81.7\%, was observed for CD4+ TN+TCM with these
+(\SI{92.9}{\percent}, \SI{99.7}{\percent}, and \SI{90.5}{\percent}
-three methods. Lastly, SR and PLSR achieved R2>90\% while other ML methods
+respectively), to predict the \rratio{}. Yet, about \SI{10}{\percent} reduction
-exhibited exceedingly variable LOO-R2 (0.3\%,RF-51.5\%,LASSO) for CD8+ TN+TCM
+in \gls{loocv}, \SIrange{72.5}{81.7}{\percent}, was observed for \rmemh{} with
-cells.
+these three methods. Lastly, \gls{sr} and \gls{plsr} achieved
 $R^2$>\SI{90}{\percent} while other \gls{ml} methods exhibited exceedingly
 variable \gls{loocv} (\SI{0.3}{\percent} for \gls{rf} to \SI{51.5}{\percent} for
 \gls{lasso}) for \rmemk{}.
 \begin{figure*}[ht!]
  \begingroup
@ -3279,13 +3309,12 @@ cells.
  \label{fig:sr_omics}
 \end{figure*}
-The top-performing technique, SR, showed that the median aggregated predictions
+The top-performing technique, \gls{sr}, showed that the median aggregated
-for CD4+ and CD8+ TN+TCM cells increases when IL2 concentration, IL15, and IL2R
+predictions for \rmemh{} \rmemk{} increases when IL2 concentration, IL15, and
-increase while IL17a decreases in conjunction with other features. These
+IL2R increase while IL17a decreases in conjunction with other features. These
-patterns combined with low values of DMS concentration and GM-CSF uniquely
+patterns combined with low values of \pdms{} and GM-CSF uniquely characterized
-characterized maximum CD8+ TN+TCM. Meanwhile, higher glycine but lower IL13 in
+maximum \rmemk{}. Meanwhile, higher glycine but lower IL13 in combination with
-combination with others showed maximum CD4+ TN+TCM predictions
+others showed maximum \rmemh{} predictions (\cref{fig:sr_omics}).
 (\cref{fig:sr_omics}).
 \begin{figure*}[ht!]
  \begingroup
@ -3306,13 +3335,25 @@ combination with others showed maximum CD4+ TN+TCM predictions
  \label{fig:mod_flower}
 \end{figure*}
-Selecting CPPs and CQAs candidates consistently for T cell memory is desired.
+Selecting \gls{cpp} and \glspl{cqa} candidates consistently for T cell memory is
-Here, \gls{tnfa} was found in consensus across all seven ML methods for predicting
+desired. Here, \gls{tnfa} was found in consensus across all seven \gls{ml}
-CD4+/CD8+ TN+TCM when considering features with the highest importance scores
+methods for predicting \rratio{} when considering features with the highest
-across models (Fig.3a;Methods). Other features, IL2R, IL4, IL17a, and DMS
+importance scores across models (\cref{fig:mod_flower_48r}). Other features,
-concentration, were commonly selected in >=5 ML methods (Fig.3a,c). Moreover,
+IL2R, IL4, IL17a, and \pdms{}, were commonly selected in $\ge$ 5 \gls{ml}
-IL13 and IL15 were found predictive in combination with these using SR
+methods (\cref{fig:mod_flower_48r}). When restricting the models only to include
-(Supp.Table.S4).
+metabolome, formate emerged as the dominant predictor shared across all seven
 models.
 % Moreover, IL13 and IL15 were found predictive in combination
 % with these using \gls{sr} (Supp.Table.S4).
 When performing similar analysis on \rmemh{}, we observe that no species for
 either the secretome or metabolome was agreed upon by all seven models
 (\cref{fig:mod_flower_cd4}). We also observed that these models did not fit as
 well as they did for \rratio{} (\cref{tab:mod_results}). For the secretome, the
 species that were agreed upon by $\ge$ 5 models were IL4, IL17a, and IL2R. For
 the metabolome, formate once again was agreed upon by $\ge$ 5 models as well as
 lactate.
 \begin{figure*}[ht!]
  \begingroup
@ -3338,55 +3379,61 @@ IL13 and IL15 were found predictive in combination with these using SR
  \label{fig:nmr_cors}
 \end{figure*}
 We also investigated the \gls{nmr} features extracted from day of expansion to
 assess if there was any predictive power for \ptmemh{}; in general these models
 had almost as good of fit despite being 2 days earlier in the process
 (\cref{fig:nmr_cors}). Lactate and formate were observed to correlate with each
 other, and both correlated with \rmemh{}. Furthermore, lactate was observed to
 positively correlate with \pdms{} and negatively correlate with glucose
 (\cref{fig:nmr_cors_lactate}). Formate also had the same correlation patterns
 (\cref{fig:nmr_cors_formate}). Glucose was only negatively correlated with
 formate and lactate (\cref{fig:nmr_cors_glucose}). Together, these data suggest
 that lactate, formate, \pdms{}, and \rmemh{} are fundamentally linked.
 \section{discussion}
-% optimization of process features
+\gls{cpp} modeling and understanding are critical to new product development and
-% TODO this sounds like total fluff
+in cell therapy development, it can have life-saving implications. The
-
+challenges for effective modeling grow with the increasing complexity of
-% DISCUSSION integrate figures
+processes due to high dimensionality, and the potential for process interactions
-
+and nonlinear relationships. Another critical challenge is the limited amount of
-CPPs modeling and understanding are critical to new product development and in
+available data, mostly small \gls{doe} datasets. \gls{sr} has the necessary
-cell therapy development, it can have life-saving implications. The challenges
+capabilities to resolve the issues of process effects modeling and has been
-for effective modeling grow with the increasing complexity of processes due to
+applied across multiple industries\cite{Kordona}. \gls{sr} discovers
-high dimensionality, and the potential for process interactions and nonlinear
+mathematical expressions that fit a given sample and differs from conventional
-relationships. Another critical challenge is the limited amount of available
+regression techniques in that a model structure is not defined a
-data, mostly small DOE datasets. SR has the necessary capabilities to resolve
+priori\cite{Koza1992}. Hence, a key advantage of this methodology is that
 the issues of process effects modeling and has been applied across multiple
 industries12. SR discovers mathematical expressions that fit a given sample and
 differs from conventional regression techniques in that a model structure is not
 defined a priori13. Hence, a key advantage of this methodology is that
 transparent, human-interpretable models can be generated from small and large
 datasets with no prior assumptions\cite{Kotancheka}.
 Since the model search process lets the data determine the model, diverse and
-competitive (e.g., accuracy, complexity) model structures are typically
+competitive model structures are typically discovered. An ensemble of diverse
-discovered. An ensemble of diverse models can be formed where its constituent
+models can be formed where its constituent models will tend to agree when
-models will tend to agree when constrained by observed data yet diverge in new
+constrained by observed data yet diverge in new regions. Collecting data in
-regions. Collecting data in these regions helps to ensure that the target system
+these regions helps to ensure that the target system is accurately modeled, and
-is accurately modeled, and its optimum is accurately located\cite{Kotancheka}.
+its optimum is accurately located\cite{Kotancheka}. Exploiting these features
-Exploiting these features allows adaptive data collection and interactive
+allows adaptive data collection and interactive modeling. Consequently, this
-modeling. Consequently, this adaptive-DOE approach is useful in a variety of
+\gls{adoe} approach is useful in a variety of scenarios, including maximizing
-scenarios, including maximizing model validity for model-based decision making,
+model validity for model-based decision making, optimizing processing parameters
-optimizing processing parameters to maximize target yields, and developing
+to maximize target yields, and developing emulators for online optimization and
-emulators for online optimization and human understanding\cite{Kotancheka}.
+human understanding\cite{Kotancheka}.
-% predictive features
+An in-depth characterization of potential \gls{dms} based T cell \glspl{cqa}
-
+includes a list of cytokine and \gls{nmr} features from media samples that are
-An in-depth characterization of potential DMS-based T-cell CQAs includes a list
+crucial in many aspects of T cell fate decisions and effector functions of
-of cytokine and NMR features from media samples that are crucial in many aspects
+immune cells. Cytokine features were observed to slightly improve prediction and
-of T cell fate decisions and effector functions of immune cells. Cytokine
+dominated the ranking of important features and variable combinations when
-features were observed to slightly improve prediction and dominated the ranking
+modeling together with \gls{nmr} media analysis and process parameters
-of important features and variable combinations when modeling together with NMR
+(\cref{fig:mod_flower}).
 media analysis and process parameters (Fig.3b,d).
 Predictive cytokine features such as \gls{tnfa}, IL2R, IL4, IL17a, IL13, and
 IL15 were biologically assessed in terms of their known functions and activities
 associated with T cells. T helper cells secrete more cytokines than T cytotoxic
 cells, as per their main functions, and activated T cells secrete more cytokines
 than resting T cells. It is possible that some cytokines simply reflect the
-CD4+/CD8+ ratio and the activation degree by proxy proliferation. However, the
+\rratio{} and the activation degree by proxy proliferation. However, the exact
-exact ratio of expected cytokine abundance is less clear and depends on the
+ratio of expected cytokine abundance is less clear and depends on the subtypes
-subtypes present, and thus examination of each relevant cytokine is needed.
+present, and thus examination of each relevant cytokine is needed.
 IL2R is secreted by activated T cells and binds to IL2, acting as a sink to
 dampen its effect on T cells\cite{Witkowska2005}. Since IL2R was much greater
@ -3399,22 +3446,23 @@ form, this may either increase or decrease CD4+ ratio and/or memory T cells
 depending on the ratio of the membrane to soluble TNF\cite{Mehta2018}. Since
 only soluble TNF was measured, membrane TNF is needed to understand its impact
 on both CD4+ ratio and memory T cells. Furthermore, IL13 is known to be critical
-for Th2 response and therefore could be secreted if there are significant Th2 T
+for \gls{th2} response and therefore could be secreted if there are significant
-cells already present in the starting population\cite{Wong2011}. This cytokine
+\glspl{th2} already present in the starting population\cite{Wong2011}. This
-has limited signaling in T cells and is thought to be more of an effector than a
+cytokine has limited signaling in T cells and is thought to be more of an
-differentiation cytokine\cite{Junttila2018}. It might be emerging as relevant
+effector than a differentiation cytokine\cite{Junttila2018}. It might be
-due to an initially large number of Th2 cells or because Th2 cells were
+emerging as relevant due to an initially large number of \glspl{th2} or because
-preferentially expanded; indeed, IL4, also found important, is the conical
+\glspl{th2} were preferentially expanded; indeed, IL4, also found important, is
-cytokine that induces Th2 cell differentiation (Fig.3). The role of these
+the conical cytokine that induces \gls{th2} differentiation
-cytokines could be investigated by quantifying the Th1/2/17 subsets both in the
+(\cref{fig:mod_flower}). The role of these cytokines could be investigated by
-starting population and longitudinally. Similar to IL13, IL17 is an effector
+quantifying \glspl{th1}, \glspl{th2}, or \glspl{th17} both in the starting
-cytokine produced by Th17 cells\cite{Amatya2017} thus may reflect the number of
+population and longitudinally. Similar to IL13, IL17 is an effector cytokine
-Th17 subset of T cells. GM-CSF has been linked with activated T cells,
+produced by \glspl{th17}\cite{Amatya2017} thus may reflect the number of
-specifically Th17 cells, but it is not clear if this cytokine is inducing
+\glspl{th17} in the population. GM-CSF has been linked with activated T cells,
 specifically \glspl{th17}, but it is not clear if this cytokine is inducing
 differential expansion of CD8+ T cells or if it is simply a covariate with
 another cytokine inducing this expansion\cite{Becher2016}. Finally, IL15 has
-been shown to be essential for memory signaling and effective in skewing CAR-T
+been shown to be essential for memory signaling and effective in skewing
-cells toward the Tscm phenotype when using membrane-bound IL15Ra and
+\gls{car} T cells toward \glspl{tscm} when using membrane-bound IL15Ra and
 IL15R\cite{Hurton2016}. Its high predictive behavior goes with its ability to
 induce large numbers of memory T cells by functioning in an autocrine/paracrine
 manner and could be explored by blocking either the cytokine or its receptor.
@ -3424,7 +3472,7 @@ activity associated with T cell activation and differentiation, yet it is not
 clear how the various combinations of metabolites relate with each other in a
 heterogeneous cell population. Formate and lactate were found to be highly
 predictive and observed to positively correlate with higher values of total live
-CD4+ TN+TCM cells (Fig.5a-b;Supp.Fig.28-S30,S38). Formate is a byproduct of the
+\rmemh{} cells (~\cref{fig:nmr_cors}). Formate is a byproduct of the
 one-carbon cycle implicated in promoting T cell activation\cite{RonHarel2016}.
 Importantly, this cycle occurs between the cytosol and mitochondria of cells and
 formate excreted\cite{Pietzke2020}. Mitochondrial biogenesis and function are
@ -3432,18 +3480,18 @@ shown necessary for memory cell persistence\cite{van_der_Windt_2012,
  Vardhana2020}. Therefore, increased formate in media could be an indicator of
 one-carbon metabolism and mitochondrial activity in the culture.
-In addition to formate, lactate was found as a putative CQA of TN+TCM. Lactate
+In addition to formate, lactate was found as a putative \gls{cqa} of \ptmem{}
-is the end-product of aerobic glycolysis, characteristic of highly proliferating
+cells. Lactate is the end-product of aerobic glycolysis, characteristic of
-cells and activated T cells\cite{Lunt2011, Chang2013}. Glucose import and
+highly proliferating cells and activated T cells\cite{Lunt2011, Chang2013}.
-glycolytic genes are immediately upregulated in response to T cell stimulation,
+Glucose import and glycolytic genes are immediately upregulated in response to T
-and thus generation of lactate. At earlier time-points, this abundance suggests
+cell stimulation, and thus generation of lactate. At earlier time-points, this
-a more robust induction of glycolysis and higher overall T cell proliferation.
+abundance suggests a more robust induction of glycolysis and higher overall T
-Interestingly, our models indicate that higher lactate predicts higher CD4+,
+cell proliferation. Interestingly, our models indicate that higher lactate
-both in total and in proportion to CD8+, seemingly contrary to previous studies
+predicts higher CD4+, both in total and in proportion to CD8+, seemingly
-showing that CD8+ T cells rely more on glycolysis for proliferation following
+contrary to previous studies showing that CD8+ T cells rely more on glycolysis
-activation\cite{Cao2014}. It may be that glycolytic cells dominate in the
+for proliferation following activation\cite{Cao2014}. It may be that glycolytic
-culture at the early time points used for prediction, and higher lactate
+cells dominate in the culture at the early time points used for prediction, and
-reflects more cells.
+higher lactate reflects more cells.
 % TODO not sure how much I should include here since I didn't do this analysis
 % AT ALL
@ -3461,20 +3509,20 @@ reflects more cells.
 % this looks fine since it is just parroting sources, just need to paraphrase a
 % little
 Metabolites that consistently decreased over time are consistent with the
-primary carbon source (glucose) and essential amino acids (BCAA, histidine) that
+primary carbon source (glucose) and essential amino acids (\gls{bcaa},
-must be continually consumed by proliferating cells. Moreover, the inclusion of
+histidine) that must be continually consumed by proliferating cells. Moreover,
-glutamine in our predictive models also suggests the importance of other carbon
+the inclusion of glutamine in our predictive models also suggests the importance
-sources for certain T cell subpopulations. Glutamine can be used for oxidative
+of other carbon sources for certain T cell subpopulations. Glutamine can be used
-energy metabolism in T cells without the need for glycolysis\cite{Cao2014}.
+for oxidative energy metabolism in T cells without the need for
-Overall, these results are consistent with existing literature that show
+glycolysis\cite{Cao2014}. Overall, these results are consistent with existing
-different T cell subtypes require different relative levels of glycolytic and
+literature that show different T cell subtypes require different relative levels
-oxidative energy metabolism to sustain the biosynthetic and signaling needs of
+of glycolytic and oxidative energy metabolism to sustain the biosynthetic and
-their respective phenotypes\cite{Almeida2016,Wang_2012}. It is worth noting that
+signaling needs of their respective phenotypes\cite{Almeida2016,Wang_2012}. It
-the trends of metabolite abundance here are potentially confounded by the
+is worth noting that the trends of metabolite abundance here are potentially
-partial replacement of media that occurred periodically during expansion
+confounded by the partial replacement of media that occurred periodically during
-(Methods), thus likely diluting some metabolic byproducts (i.e. formate,
+expansion, thus likely diluting some metabolic byproducts (such as formate,
-lactate) and elevating depleted precursors (i.e. glucose, amino acids). More
+lactate) and elevating depleted precursors (such as glucose and amino acids).
-definitive conclusions of metabolic activity across the expanding cell
+More definitive conclusions of metabolic activity across the expanding cell
 population can be addressed by a closed system, ideally with on-line process
 sensors and controls for formate, lactate, along with ethanol and glucose.