From f8c9747d3b6425b420839ff06931b63692318f03 Mon Sep 17 00:00:00 2001 From: Blaise Thompson Date: Thu, 5 Apr 2018 09:47:06 -0500 Subject: 2018-04-05 09:47 --- acquisition/quad.png | Bin 0 -> 132157 bytes acquisition/quad.py | 39 +++++++++++++++++++++++++++++ dissertation.tex | 4 +-- outline.org | 1 - processing/chapter.tex | 15 +++++------ software/chapter.tex | 66 ++++++++++++++++++++++++++++++++----------------- todo.org | 15 +++++++++++ 7 files changed, 108 insertions(+), 32 deletions(-) create mode 100644 acquisition/quad.png create mode 100644 acquisition/quad.py diff --git a/acquisition/quad.png b/acquisition/quad.png new file mode 100644 index 0000000..d400b97 Binary files /dev/null and b/acquisition/quad.png differ diff --git a/acquisition/quad.py b/acquisition/quad.py new file mode 100644 index 0000000..d6519e5 --- /dev/null +++ b/acquisition/quad.py @@ -0,0 +1,39 @@ +# --- import -------------------------------------------------------------------------------------- + + +import os + +import matplotlib.pyplot as plt + +import WrightTools as wt + + +# --- define -------------------------------------------------------------------------------------- + + +here = os.path.abspath(os.path.dirname(__file__)) + + +# --- workspace ----------------------------------------------------------------------------------- + + +# create ax +fig, gs = wt.artists.create_figure(width='double') +ax = plt.subplot(gs[0, 0]) + +ax.set_xlim(-1.1, 1.1) +ax.set_ylim(-1.1, 1.1) + +ax.arrow(-1, 0, 2, 0, lw=5, head_width=0.1) +ax.arrow(0, -1, 0, 2, lw=5, head_width=0.1) + + +es = {} +es['MR-CMDS'] = (-0.5, 0.5) + +for label, coordinates in es.items(): + ax.text(*coordinates, label, fontsize=20) + +# save +p = os.path.join(here, 'quad.png') +wt.artists.savefig(p) diff --git a/dissertation.tex b/dissertation.tex index c7af5d5..cf02bd5 100644 --- a/dissertation.tex +++ b/dissertation.tex @@ -73,8 +73,8 @@ This dissertation is approved by the following members of the Final Oral Committ \include{software/chapter} \part{Development} \label{prt:development} -%\include{processing/chapter} -%\include{acquisition/chapter} +\include{processing/chapter} +\include{acquisition/chapter} %\include{active_correction/chapter} %\include{opa/chapter} %\include{mixed_domain/chapter} diff --git a/outline.org b/outline.org index 014c5e6..7d4d9eb 100644 --- a/outline.org +++ b/outline.org @@ -39,7 +39,6 @@ *** TODO wmel diagrams for TG, TG with population transfer *** TG and TA *** derive how TA works out so nicely -* ABANDONED mat: materials * sof: software * pro: processing ** data object model diff --git a/processing/chapter.tex b/processing/chapter.tex index baca84c..c1e46f4 100644 --- a/processing/chapter.tex +++ b/processing/chapter.tex @@ -33,8 +33,7 @@ enough to be a foundational tool. % When creating a toolkit for CMDS, there are several challenges worth considering: \begin{ditemize} - \item Dimensionality of datasets can typically be greater than two, complicating - \textbf{representation}. + \item Dimensionality of datasets can typically be greater than two, complicating representation. \item Shape and dimensionality change, and relevant axes can be different from the scanned dimensions. % \item Data can be awkwardly large-ish (several million pixels), and can become legitimately large @@ -45,10 +44,10 @@ The biggest challenge is to find a really good definition for what constitutes a Once understood, this common denominator can be enshrined into software and built upon. % WrightTools is a software package written in Python, built using the excellent tools provided by -the scientific Python collection of packages, especially Scipy and Numpy. % TODO: cite cite cite +the scientific Python collection of packages, especially Scipy and Numpy. [CITE?] % WrightTools defines a universal file-format that is flexible enough to encompass the diversity of CMDS while still being entirely self-describing. % -This file format is based on the popular binary format ``HDF5''. % TODO: cite +This file format is based on the popular binary format ``HDF5''. \cite{h5py} % This format allows for computers to interact with the arrays piece-by-piece in a very fast and reliable way, without loading the entire array in and out of memory. % WrightTools piggybacks on this, allowing users to interact with legitimately large CMDS datasets @@ -59,7 +58,7 @@ coordinate spaces. % WrightTools is written to be used in scripts and in the command line. % It does not have any graphical components built in, except for the ability to generate plots using -matplotlib. % TODO: cite +matplotlib. [CITE?] % Being built in this way gives WrightTools users maximum flexibility, and allows for rapid collaborative development. % It also allows other software packages to use WrightTools as a ``back-end'' foundational software, @@ -77,7 +76,7 @@ To use WrightTools, simply import: 3.0.0 \end{codefragment} I'll discuss more about how exactly WrightTools packaging, distribution, and instillation works in -\autoref{pro:sec:processing_distribution}. +\autoref{pro:sec:distribution}. We can use the builtin Python function \python{dir} to interrogate the contents of the WrightTools package. % @@ -929,6 +928,7 @@ This license is incredibly permissive and puts as few restrictions as possible o Because the license is short, it is reproduced below. % \begin{dquote} + The MIT License (MIT) Copyright (c) 2016-2018 WrightTools Developers. @@ -948,6 +948,7 @@ Because the license is short, it is reproduced below. % NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + \end{dquote} As an aside, since Python is an interpreted language the source code of a library \emph{must} be @@ -957,7 +958,7 @@ However many Python libraries end up being interfaces to compiled code that coul closed-source. % The Scientific Python Stack have MIT-compatible licenses, including BSD-like licenses. % -\subsection{Distribution} % ---------------------------------------------------------------------- +\subsection{Distribution} \label{pro:sec:distribution} % ------------------------------------------ How does WrightTools get onto end-users machines? % Distribution... diff --git a/software/chapter.tex b/software/chapter.tex index b52764b..b235ecb 100644 --- a/software/chapter.tex +++ b/software/chapter.tex @@ -1,6 +1,4 @@ -% TODO: add StoddenVictoria2016a (Enhancing reproducibility for computational methods) - -\chapter{Software} +\chapter{Software} \label{cha:sof} \begin{dquote} The following guidelines are to be used in the documentation of all software developed in the @@ -55,8 +53,8 @@ basic software engineering concepts. % This is in part due to the their general lack of formal training in programming and software development. \textcite{HannayJoErskine2009a} found that over 90\% of scientists learn software development through `informal self study', while \textcite{SegalJudith2004a} mentions that -``[scientists] do not describe themselves as software developers and have little formal education -or training in software development''. HannayJoErskine2009a agrees. JoppaLucasN2013a aggrees. +\emph{``[scientists] do not describe themselves as software developers and have little formal + education or training in software development''}. This lack of training is not in-and-of-itself a problem. % After all, academic scientists are required to be ``do-it-yourself''ers in many contexts for which @@ -105,7 +103,15 @@ Software development ``by-and-for'' scientists poses unique challenges. % In this section, I attempt to summarize the literature about these challenges, with a focus on those challenges that I have found most relevant. % -\textbf{Extensibility.} % TODO: cite +\textbf{``End-user developers.''} \cite{SegalJudith2005a, HannayJoErskine2009a, JoppaLucasN2013a} +% TODO: see Joppa ref 17, 21 22 +Typically the developers of scientific software are not trained software developers. % +This is perfectly appropriate, because scientific software development typically requires a large +amount of domain knowledge that only ``end-users'' possess. % +Software development practices may not be valued in a scientific environment. % + +\textbf{Extensibility.} \cite{SegalJudith2005a, CarverJeffreyC2007a, HannayJoErskine2009a, + PrabhuPrakash2011a} Many traditional software development paradigms demand an upfront articulation of goals and requirements. % This allows the developers to carefully design their software, even before a single line of code is @@ -122,13 +128,18 @@ of researchers and a contracted team of software engineers. % \end{dquote} -PrabhuPrakash2011a---lots of good stuff under ``Scientists do not rigorously test their programs'' +Scientific software is \emph{explorative}, and it needs to be flexible and extendable. % +Scientific software developers cannot know what will be required before they set out to try. % +This is probably the most fundamental challenge in such projects, and a big part of why science +cannot simply ``contract out'' a large part of its software development needs. % +Sometimes, a scientific problem is worked out though the iterative process of developing software +to solve it. % -\textbf{Lifetime.} -PrabhuPrakash2011a--- subsection ``long history of software development'' -Challenges with portability, and updating to ``modern standards''. +\textbf{Lifetime.} \cite{CarverJeffreyC2007a, PrabhuPrakash2011a} +Many scientific software projects have long life cycles, measured in decades or more. % +Challenges with portability, and updating to ``modern standards''. % -\textbf{Maintenance} +\textbf{Maintenance.} \cite{PrabhuPrakash2011a} Scientific software, especially software maintained by graduate students, tends to be very hard to maintain. % This problem is compounded by the long lifetime of such software, and the poorly defined @@ -138,9 +149,17 @@ written by generation upon generation of student. % Worse, software is sometimes abandoned or left untouched to become a crucial but arcane component of a scientific research project. % -\textbf{Optimization} -PrabhuPrakash2011a: ``scientists do not optimize for the common case'', ``scientists are unaware of -parallelization paradigms'' +\textbf{Testing.} \cite{SandersRebecca2008a, PrabhuPrakash2011a} +Testing is a huge part of software development practices, but many researchers do not engage in +sufficient testing of their software... % +The issue of testing is also consistent with the system of peer review... +Software is not typically peer reviewed... +Especially for domain-specific computational software, determining the ``correct outcome'' to test +against is often infeasible. % + +\textbf{Optimization.} \cite{PrabhuPrakash2011a} +Scientists do not optimize for the common case. % +Scientists are unaware of parallelization paradigms. % \section{Good-enough practices} % ================================================================ @@ -151,7 +170,7 @@ In this section, I attempt to very quickly summarize my personal perspective on software development good---with citations to literature that supports each idea. % These practices are not, generally, \emph{extra work}. % In fact, many of them save massive amounts of time and effort in the long \emph{and} short run, -when properly applied. % +when properly applied. \cite{WilsonGreg2006a} % \textbf{Do not reinvent.} \cite{WilsonGreg2017a} % Before you sit down and implement a piece of software, stop! % @@ -172,7 +191,7 @@ that accepts a set of arguments. % If your software package grows to contain multiple files, make those files modular. % As a general rule, once you have two classes you need multiple files. % -\textbf{Choose good data formats.} \cite{WilsonGreg2017a} % +\textbf{Choose good data formats.} \cite{BaxterSusanM2006a, WilsonGreg2017a} % Choose a non-proprietary format if at all possible---remember: you yourself might not have access to the proprietary software in 10 years. % Choose plain text if you can. % @@ -185,7 +204,7 @@ Make sure that it is clear what each piece of data means. % For tabular data, use headers. % Don't forget units. % -\textbf{Use version control.} % +\textbf{Use version control.} \cite{BaxterSusanM2006a, WilsonGreg2006a} % Version control systems allow programmers to save a software package such that they can always return to that save point. % All of the files in the package are saved together. % @@ -205,7 +224,7 @@ reason not to. % If the language you are using has a convention for representing the version programmatically, such as a \python{__version__} attribute in Python, comply with that convention. % -\textbf{Test.} \cite{WilsonGreg2017a} % +\textbf{Test.} \cite{BaxterSusanM2006a, WilsonGreg2006a, WilsonGreg2017a} % As the old saying goes, ``if it's not tested, it's broken''. % If you rely on a piece of functionality in your software, consider writing a test that defines that functionality. % @@ -220,7 +239,7 @@ unless your project becomes very important. % Distribute test datasets, when appropriate. % Remember, your tests can serve double duty as simple minimal examples. % -\textbf{Collaborate and share.} \cite{WilsonGreg2017a, BarnesNick2010a} % +\textbf{Collaborate and share.} \cite{BaxterSusanM2006a, WilsonGreg2017a, BarnesNick2010a} % If you are part of a team, consider sharing software and collaborating to create it. % Try using practices like code review and issue tracking, but don't feel obligated to use them if it doesn't make sense for your project. % @@ -232,7 +251,10 @@ Put your software on an open platform, like GitHub \cite{GitHub}, and mint a DOI Cite your software, and ask other people who are using your software to do the same. % Choose a license early, and choose permissive and commercially compatible unless you 1. know what you are doing and 2. plan to enforce. % -% TODO: cite 'publish your code it is good enough' +Afraid to share because your code needs more polish? % +If your software is good enough to be used in active scientific research, it's worth sharing. % +As Nick Barnes says, \emph{``Publish your computer code: it is good enough''}. +\cite{BarnesNick2010a} % \textbf{Write human readable code, and document it well.} \cite{WilsonGreg2017a} % Let the computer do the work, but write the program to be read by a human. % @@ -290,9 +312,9 @@ class Person(): Now I can make some instances of that class, and access their attributes and methods. % \begin{codefragment}{python} >>> mary = Person(name='Mary', favorite_food='pizza', hated_food='falafel') ->>> jane = Person(name='Jane', favorite_food='salad') +>>> jane = Person(name='Jane', favorite_food='salad'') >>> mary.react_to('falafel') -'gross---no thank you''' +'gross---no thank you'''''' >>> jane.react_to('salad') 'yum! my favorite' >>> mary.favorite_food diff --git a/todo.org b/todo.org index 27c5d7d..a1777e5 100644 --- a/todo.org +++ b/todo.org @@ -1,2 +1,17 @@ +* DONE draft challenges section :software: + CLOSED: [2018-04-05 Thu 09:12] +* TODO "quadrants of complexity" idea :acquisition: +** TODO figure +** TODO arguments +* TODO tables :processing: +* TODO distribution section :processing: +* TODO development section :processing: +* TODO incorporate "most software is not peer reviewed" :software: +** see Joppa reference 6 +* TODO incorporate idea "most people over-trust software" :software: +* TODO incorporate StoddenVictoria2016a :software: +* TODO incorporate all references :software: * TODO quote from RC :introduction: +* TODO insert content :PbSe: +* TODO insert content from SI :mixed_domain: * IDEA 2D delay example from ps system :active: -- cgit v1.2.3