From a6a2dfeccc695a1a23733f64fc1eccdf6bd9cefa Mon Sep 17 00:00:00 2001 From: Blaise Thompson Date: Sat, 24 Mar 2018 21:59:52 -0500 Subject: 2018-03-24 21:59 --- processing/chapter.tex | 124 +++++++++++++++++++++++++++++++++++---- processing/fringes_transform.png | Bin 0 -> 378866 bytes processing/fringes_transform.py | 27 +++++++++ 3 files changed, 141 insertions(+), 10 deletions(-) create mode 100644 processing/fringes_transform.png create mode 100644 processing/fringes_transform.py (limited to 'processing') diff --git a/processing/chapter.tex b/processing/chapter.tex index 28d1858..40055c4 100644 --- a/processing/chapter.tex +++ b/processing/chapter.tex @@ -1,6 +1,25 @@ -\chapter{Processing} - -% TODO: cool quote, if I can think of one +\chapter{Processing} \label{cha:pro} + +\begin{dquote} + What we have is data glut. + What we really want is the ability to manipulate the information and to reach conclusions from + it. + I think we are at the point where that is slipping beyond unaided humans’ abilities. + So the real thing to be looking for is processing schemes. + One way is automatic processing: for instance, the sort of analysis that we saw with the IBM + Watson on Jeopardy. + Putting that in service to humankind in fields that are suffering from data glut at least gives + people who are in charge the ability to keep some sort of track of what is going on. + + The other great thing that we have going for us is that we have billions of very intelligent + people out there in the world. + :With the networking that we have now, we’re beginning to see that those large populations, + coordinating amongst themselves, are an intellectual resource that trumps all institutional + intellectual resources and has a real possibility, if it’s supported by the proper automation, of + creating solutions to problems, including the problem of the data glut. + + \dsignature{Vernor Vinge \cite{VingeVernor2012a}} +\end{dquote} \clearpage @@ -290,6 +309,64 @@ Users can configure which files are routed to which from-function. % % TODO (also document on wright.tools) +\subsection{Variables and channels} % ------------------------------------------------------------ + +Data objects are made up of many component channels and variables, each array having the same +dimensionality of its parent data. % +This strategy allows for maximal flexibility in data representation, but it can be overly expensive +if certain arrays do not actually change against all of the dimensions. % +This is often especially true with variables, which typically correspond to scannable hardware that +may not have even moved across some (or any) dimensions. % +To avoid unnecessarily large arrays, WrightTools allows Channels and Variables to have different +sizes than the parent data. % +As an example, consider the following object. +\begin{codefragment}{bash} % TODO: need to use bash here because of box charachters :-( +>>> import WrightTools as wt; from WrightTools import datasets +>>> data = wt.data.from_COLORS(datasets.COLORS.v2p1_MoS2_TrEE_movie) +>>> data.print_tree() +MoS2 (/tmp/qhg_1b3l.wt5) +├── axes +│ ├── 0: w2 (nm) (41, 1, 1) +│ ├── 1: w1=wm (nm) (1, 41, 1) +│ └── 2: d2 (fs) (1, 1, 23) +├── variables +│ ├── 0: w2 (nm) (41, 1, 1) +│ ├── 1: w1 (nm) (1, 41, 1) +│ ├── 2: wm (nm) (1, 41, 1) +│ ├── 3: d2 (fs) (1, 1, 23) +│ ├── 4: w3 (nm) (1, 1, 1) +│ ├── 5: d0 (fs) (1, 1, 1) +│ └── 6: d1 (fs) (1, 1, 1) +└── channels + ├── 0: ai0 (41, 41, 23) + ├── 1: ai1 (41, 41, 23) + ├── 2: ai2 (41, 41, 23) + ├── 3: ai3 (41, 41, 23) + ├── 4: ai4 (41, 41, 23) + └── 5: mc (41, 41, 23) +\end{codefragment} +Note that this is the primary dataset discussed in \autoref{cha:mx2}. % +The shape of this data object is \python{(41, 41, 23)}, but none of the variables have that full +shape. % +From a quick inspection, one can see that \python{w1} and \python{wm} were scanned together, while +\python{w2} and \python{d2} were the other two dimensions. % +\python{w3}, \python{d0}, and \python{d1} were not moved at all, yet their coordinates are still +propagated. % +The axes have the joint shape of their component variables. % +Although not shown in this example, channels also may have axes with length 1. + +Axes, variables, and channels are array-likes, so they support slicing operations. % +In addition, all three classes have \python{points} and \python{full} attributes that return the +squeezed and broadcasted array, respectively. % + +\begin{figure} + \includegraphics[width=\textwidth]{"processing/fringes_transform"} + \includepython{"processing/fringes_transform.py"} + \caption[CAPTION TODO]{ + CAPTION TODO} +\end{figure} + + \subsection{Math} % ------------------------------------------------------------------------------ Now that we know the basics of how the WrightTools \python{Data} class stores data, it's time to do @@ -300,7 +377,7 @@ Let's start with some elementary algebra. % In Python, operators are symbols that carry out some computation. % Consider the following: -\begin{codefragment}{python, label=abcdefg} +\begin{codefragment}{python, label=pro:lst:array_addition} >>> import numpy as np >>> a = np.array([4, 5, 6]) >>> b = np.array([-1, -2, -3]) @@ -312,7 +389,7 @@ Here, \python{a} and \python{b} are operands and \python{+} is an operator. % When used in this simple way, operators typically create and return a \emph{new} object in the computers memory. % We can verify this by using Python's built-in \python{id} function on the objects created in -\ref{abcdefg}. % +\ref{pro:lst:array_addition}. % \begin{codefragment}{python} >>> id(a), id(b), id(c) (139712529580400, 139712333712320, 139712333713040) @@ -321,12 +398,39 @@ This is usually fine, but sometimes the operands are unwieldy large objects that memory to store. % In other cases operators are used millions of times such that, used as above, millions of new arrays will be created. % -One way to avoid these problems is to use \emph{in-place} operators - -Because the \python{Data} object is mostly stored outside of memory, it is better to do -in-place... % TODO -Broadcasting... % TODO +One way to avoid these problems is to use \emph{in-place} operators. % +Using a slightly different syntax, one can tell Python to overwrite one of the operands with the +new value. % +Continuing from \ref{pro:lst:array_addition}: +\begin{codefragment}{python, label=pro:lst:in_place_addition} +>>> a += b +>>> a +array([3, 3, 3]) +\end{codefragment} +No output \python{c} array was created, so no additional memory footprint is needed in +\ref{pro:lst:in_place_addition}. % +Since WrightTools channels and variables are typically large arrays, and since these arrays are +stored on disk inside of a larger file, WrightTools requires the use of in-place operators for all +normal math. % +Currently WrightTools supports addition (\python{+=}), multiplication(\python{*=}), +power (\python{**=}), subtraction (\python{-=}), and division (\python{/=}). % +As an example, consider dividing a channel by a specific factor: +\begin{codefragment}{python} +>>> import WrightTools as wt; from WrightTools import datasets +>>> data = wt.data.from_JASCO(datasets.JASCO.PbSe_batch_1) +data.created at /tmp/tdyvfxu8.wt5::/ + range: 2500.0 to 700.0 (nm) + size: 1801 +>>> data.signal + +>>> data.signal.min(), data.signal.max() +(0.10755, 1.58144) +>>> data.signal /= 2 +>>> data.signal.max(), data.signal.min() +(0.053775, 0.79072) +\end{codefragment} +Variables also support in-place operators. % \subsubsection{Clip} diff --git a/processing/fringes_transform.png b/processing/fringes_transform.png new file mode 100644 index 0000000..4d400d2 Binary files /dev/null and b/processing/fringes_transform.png differ diff --git a/processing/fringes_transform.py b/processing/fringes_transform.py new file mode 100644 index 0000000..1f80a86 --- /dev/null +++ b/processing/fringes_transform.py @@ -0,0 +1,27 @@ +import os +import matplotlib.pyplot as plt +import WrightTools as wt; from WrightTools import datasets + +here = os.path.abspath(os.path.dirname(__file__)) + +data = wt.data.from_PyCMDS(datasets.PyCMDS.w2_w1_000) +data.signal_mean.symmetric_root(0.5) # to amplitude level +data.convert('wn') + +fig, gs = wt.artists.create_figure(width='double', cols=[1, 1, 'cbar']) +# as taken +ax = plt.subplot(gs[0, 0]) +ax.pcolor(data) +wt.artists.set_ax_labels(xlabel=data.w2.label, ylabel=data.w1.label) +ax.grid(); ax.set_title('as taken', fontsize=20) +# transformed +ax = plt.subplot(gs[0, 1]) +data.transform('wm', 'w1') +data.convert('wn') +ax.pcolor(data) +wt.artists.set_ax_labels(xlabel=data.wm.label, yticks=False) +ax.grid(); ax.set_title('transformed', fontsize=20) +# colorbar +wt.artists.plot_colorbar(plt.subplot(gs[0, -1]), label='amplitude') +# save +wt.artists.savefig(os.path.join(here, 'fringes_transform.png')) -- cgit v1.2.3