/* * data_file.cc -- ePiX::data_file class * * This file is part of ePiX, a C++ library for creating high-quality * figures in LaTeX * * Version 1.2.0-2 * Last Change: September 26, 2007 */ /* * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 * Andrew D. Hwang * Department of Mathematics and Computer Science * College of the Holy Cross * Worcester, MA, 01610-2395, USA */ /* * ePiX is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * ePiX is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public * License for more details. * * You should have received a copy of the GNU General Public License * along with ePiX; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include "errors.h" #include "functions.h" #include "triples.h" #include "path.h" #include "spline.h" #include "label_data.h" #include "curves.h" #include "interval.h" #include "data_bins.h" #include "data_mask.h" #include "data_file.h" namespace ePiX { const std::string default_delim("\t"); const std::string default_commt("%"); // magic number -- precision for data_file.write() const unsigned int PRECISION(6); double truncate(double arg, const unsigned int n) { if (fabs(arg) < pow(0.1, n)) return 0; return arg; } // warning message for data_file::tokenize void non_parsable(const std::string& msg) { std::stringstream obuf; obuf << "data_file::tokenise():" << std::endl << " Non-parsable input \"" << msg << "\", setting value to 0"; epix_warning(obuf.str()); } //// private data_file functions //// // returns # of columns, i.e. # of entries in first parsable row unsigned int data_file::entries(const char* filename) { std::ifstream input(filename); if (!input) { epix_warning("Cannot open file \""+std::string(filename)+"\""); return(0); } // else std::string linebuf; std::vector tmp; while (getline(input, linebuf) && tmp.size() == 0) { // strip comments linebuf = linebuf.substr(0, linebuf.find_first_of(m_commt)); if (linebuf.length() > 0) tmp = tokenise(linebuf); } input.close(); if (tmp.size() == 0) { std::stringstream obuf; // solely for code readability :) obuf << "File \"" << std::string(filename) << "\" contains no parsable data"; epix_warning(obuf.str()); } return tmp.size(); } // end of entries(const char*) // private function: // Tokenise line using our delimiter, return a vector of doubles std::vector data_file::tokenise(std::string line) { size_t pos(line.find(m_delim, 0)); // first delimiter std::string tmpStr; // current chunk of input double tmpDbl; // current parsed chunk std::vector tmpVec; // output found so far // read current line into a vector of doubles while(pos != std::string::npos) { tmpStr = line.substr(0, pos); line.erase(0, pos+1); std::istringstream convStr(tmpStr); if (!(convStr >> tmpDbl)) { tmpDbl = 0; non_parsable(convStr.str()); } tmpVec.push_back(tmpDbl); pos = line.find(m_delim, 0); } if (line.size()) // There's input remaining { std::istringstream convStr(line); if (!(convStr >> tmpDbl)) { tmpDbl = 0; non_parsable(convStr.str()); } else tmpVec.push_back(tmpDbl); } return tmpVec; } //// public data_file functions //// data_file::data_file(unsigned int n) : m_precision(PRECISION), m_data(n), m_delim(default_delim), m_commt(default_commt) { } // provide two version to avoid exposing default delim, commt data_file::data_file(const char* filename, const std::string& delim, const std::string& commt ) : m_precision(PRECISION), m_delim(delim), m_commt(commt) { // get number of columns by parsing first line of file const unsigned int N(entries(filename)); m_data.resize(N); if (N > 0) // file contains data read(filename); } // end of data_file() // filename-only version data_file::data_file(const char* filename) : m_precision(PRECISION), m_delim(default_delim), m_commt(default_commt) { // get number of columns by parsing first line of file const unsigned int N(entries(filename)); m_data.resize(N); if (N > 0) // file contains data read(filename); } // end of data_file(const char*) // file made from components data_file::data_file(double f(double), double t_min, double t_max, unsigned int num_pts) : m_precision(PRECISION), m_data(1), m_delim(default_delim), m_commt(default_commt) { const double dt((t_max - t_min)/num_pts); for (unsigned int i=0; i<= num_pts; ++i) m_data.at(0).push_back(f(t_min+i*dt)); } data_file::data_file(double f1(double), double f2(double), double t_min, double t_max, unsigned int num_pts) : m_precision(PRECISION), m_data(2), m_delim(default_delim), m_commt(default_commt) { const double dt((t_max - t_min)/num_pts); for (unsigned int i=0; i<= num_pts; ++i) { m_data.at(0).push_back(f1(t_min+i*dt)); m_data.at(1).push_back(f2(t_min+i*dt)); } } data_file::data_file(double f1(double), double f2(double), double f3(double), double t_min, double t_max, unsigned int num_pts) : m_precision(PRECISION), m_data(3), m_delim(default_delim), m_commt(default_commt) { const double dt((t_max - t_min)/num_pts); for (unsigned int i=0; i<= num_pts; ++i) { m_data.at(0).push_back(f1(t_min+i*dt)); m_data.at(1).push_back(f2(t_min+i*dt)); m_data.at(2).push_back(f3(t_min+i*dt)); } } data_file& data_file::read(const char* filename) { unsigned int columns(entries(filename)); if (columns != m_data.size()) { std::stringstream msg; msg << "Column count mismatch in file " << filename; epix_warning(msg.str()); } else if (0 < columns) { std::ifstream input(filename); std::string linebuf; std::vector line; bool warned(false); while (getline(input, linebuf)) { // strip comments linebuf = linebuf.substr(0, linebuf.find_first_of(m_commt)); if (linebuf.length() > 0) { line = tokenise(linebuf); if (line.size() > m_data.size()) { if (!warned) { epix_warning("File has more columns than allocated"); warned = true; } } else if (line.size() < m_data.size()) { if (!warned) { epix_warning("File has fewer columns than allocated"); warned = true; } } else { for (unsigned int i = 0; i < m_data.size(); i++) m_data.at(i).push_back(line.at(i)); } } // linebuf non-empty } // end of file input.close(); } return *this; } // end of data_file::read(const char*, const std::string&) // transform column(s) data_file& data_file::transform(double f(double), unsigned int col) { unsigned int rows(m_data.at(0).size()); if (0 < col) // apply to selected column for (unsigned int i=0; i::iterator> iter(m_data.size()); for (unsigned int i = 0; i < m_data.size(); i++) iter.at(i) = m_data.at(i).begin(); while (iter.at(0) != m_data.at(0).end()) { if ( dm.masks(*iter.at(col-1)) ) for (unsigned int j = 0; j < m_data.size(); j++) m_data.at(j).erase(iter.at(j)); else for (unsigned int j = 0; j < m_data.size(); j++) iter.at(j)++; } return *this; } data_file& data_file::prune(const interval& range, const unsigned int col) { data_mask dm(range); return prune(dm, col); } data_file& data_file::prune(double rmin, double rmax, const unsigned int col) { data_mask dm(rmin, rmax); return prune(dm, col); } // (col1|col2) double data_file::dot(unsigned int col1, unsigned int col2) const { double sum(0); if ((col1 > m_data.size()) || (col2 > m_data.size()) ) epix_warning("Invalid column index in dot product"); else for (unsigned int i=0; i < m_data.at(0).size(); ++i) sum += m_data.at(col1-1).at(i)*m_data.at(col2-1).at(i); return sum; } // end of dot // avg (mean) of col1 double data_file::avg(unsigned int col1) const { double sum(0); if (col1 > m_data.size()) epix_warning("Invalid column index in mean"); else for (unsigned int i=0; i < m_data.at(0).size(); ++i) sum += m_data.at(col1-1).at(i); return sum/m_data.at(0).size(); } // end of avg // variance (x|x) - n*\bar{x}^2 double data_file::var(unsigned int col1) const { double mean(avg(col1)); return dot(col1, col1) - mean*mean*m_data.at(0).size(); } // end of var // covariance (x|y) - n*\bar{x}*\bar{y} double data_file::covar(unsigned int col1, unsigned int col2) const { return dot(col1, col2) - (m_data.at(0).size())*avg(col1)*avg(col2); } // end of covar void data_file::regression(unsigned int col1, unsigned int col2) const { Line(P(avg(col1), avg(col2)), covar(col1, col2)/var(col1)); } //// Output functions //// // return selected column std::vector data_file::column(unsigned int n) const { if (1 <= n && n <= m_data.size()) return m_data.at(n-1); else { if (0 < m_data.size()) { epix_warning("Out of range argument to data_file::column()"); return m_data.at(0); } else { epix_warning("data_file::column() requested for empty file"); std::vector err_val(1, 0); return err_val; } } } // apply f to selected column std::vector data_file::column(double f(double), unsigned int n) const { // must copy data std::vector value(m_data.at(0).size()); unsigned int col(n); if (col < 1 || m_data.size() < col) { if (0 < m_data.size()) { epix_warning("Out of range argument to data_file::column()"); col=1; } else { epix_warning("data_file::column() requested for empty file"); std::vector err_val(1, 0); return err_val; } } for (unsigned int i=0; i data(num_entries); // create path for (unsigned int i=0; i < num_entries; ++i) try { if (col3 == 0) data.at(i) = f(m_data.at(col1-1).at(i), m_data.at(col2-1).at(i), 0); else data.at(i) = f(m_data.at(col1-1).at(i), m_data.at(col2-1).at(i), m_data.at(col3-1).at(i)); } catch (std::out_of_range) { epix_warning("data_file::plot(): Invalid column index"); return; } // and write it if (PATH == TYPE) // N.B. Unaffected by "select" { path temp(data, false, false); // not closed or filled temp.draw(); } else for (unsigned int i=0; i < num_entries; ++i) { // if (m_select(data.at(i))) label_data temp(data.at(i), TYPE); temp.draw(); } } void data_file::plot(epix_mark_type TYPE, P f(double, double, double), unsigned int col1, unsigned int col2, unsigned int col3) const { plot(TYPE, col1, col2, col3, f); } //// global functions //// void plot(const char* filename, epix_mark_type TYPE, unsigned int col1, unsigned int col2, unsigned int col3, P f(double, double, double)) { data_file temp(filename); temp.plot(TYPE, col1, col2, col3, f); } void plot(const char* filename, epix_mark_type TYPE, P f(double, double, double), unsigned int col1, unsigned int col2, unsigned int col3) { data_file temp(filename); temp.plot(TYPE, col1, col2, col3, f); } // pass 3rd arg by value void histogram(const char* filename, unsigned int col, data_bins db, double scale) { data_file temp(filename); db.read(temp.column(col)); db.histogram(scale); } void bar_chart(const char* filename, unsigned int col, data_bins db, double scale) { data_file temp(filename); db.read(temp.column(col)); db.bar_chart(scale); } } // end of namespace