Logo Search packages:      
Sourcecode: bibledit version File versions  Download package

utilities.cpp

/*
    Copyright (C) 2003-2006 Teus Benschop.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/


#include "libraries.h"
#include "utilities.h"
#include <libgen.h>
#include <glib.h>
#include <config.h>
#include "constants.h"
#include <algorithm>
#include <iterator>
#include <functional>


ustring trim (const ustring & s)
{
  if (s.length () == 0)
    return s;
  // Strip spaces, tabs and new lines
  size_t beg = s.find_first_not_of (" \t");
  size_t end = s.find_last_not_of (" \t\n\r");
  // No non-spaces  
  if (beg == string::npos)
    return "";
  return ustring (s, beg, end - beg + 1);
}


ustring number_in_string (const ustring & str)
{
  // Looks for and returns a positive number in a string.
  ustring output = str;
#define MY_NUMBERS "0123456789"
  output.erase (0, output.find_first_of (MY_NUMBERS));
  size_t end_position = output.find_first_not_of (MY_NUMBERS);
#undef MY_NUMBERS
  if (end_position != string::npos) {
    output.erase (end_position, output.length ());
  }
  return output;
}


ustring upperCase (const ustring & s)
{
// Make an uppercase copy of s
  string upper (s);
  for (size_t i = 0; i < s.length (); ++i)
    upper[i] = toupper (upper[i]);
  return upper;
}


ustring lowerCase (const ustring & s)
{
// Make a lowercase copy of s
  string lower (s);
  for (size_t i = 0; i < s.length (); ++i)
    lower[i] = tolower (lower[i]);
  return lower;
}


ustring remove_spaces (const ustring & s)
{
  ustring s2 = s;
  size_t spacepos = s2.find (" ");
  while (spacepos != string::npos) {
    s2.erase (spacepos, 1);
    spacepos = s2.find (" ");
  }
  return s2;
}


unsigned int convert_to_int (const ustring & str)
{
  unsigned int i = 0;
  istringstream r (str);
  r >> i;
  return i;
}


ustring convert_to_string (int i)
{
  ostringstream r;
  r << i;
  return r.str ();
}


ustring convert_to_string (unsigned int &i)
{
  int i2 = i;
  return convert_to_string (i2);
}


ustring convert_to_string (bool b)
{
  if (b)
    return "1";
  else
    return "0";
}


ustring convert_to_string (double d)
{
  ostringstream r;
  r << d;
  return r.str ();
}


bool convert_to_bool (const ustring & s)
{
  if (s == "")
    return false;
  if (s == "0")
    return false;
  if (s == "F")
    return false;
  else
    return true;
}


double convert_to_double (const ustring & s)
{
  double result;
  istringstream i (s);
  i >> result;
  return result;
}


bool string_in_file (const ustring & filename, const ustring & line)
{
  /* 
     Returns true if the string "line" is found in file "filename".
   */
  ustring command =
    "grep --quiet '" + line + "' '" + filename + "' > /dev/null 2>&1";
  int i = system (command.c_str ());    // Will return exitstatus 0 if a match was found.
  return (i == 0);
}


ustring gw_build_filename (const ustring& part1, const ustring& part2)
// Wrapper for g_build_filename, to make programming easier.
{
  ustring filename;
  gchar *name;
  name = g_build_filename (part1.c_str(), part2.c_str(), NULL);
  filename = name;
  g_free (name);
  return filename;
}


ustring gw_build_filename (const ustring& part1, const ustring& part2, const ustring& part3)
// Wrapper for g_build_filename, to make programming easier.
{
  ustring filename;
  gchar *name;
  name = g_build_filename (part1.c_str(), part2.c_str(), part3.c_str(), NULL);
  filename = name;
  g_free (name);
  return filename;
}


ustring gw_build_filename (const ustring& part1, const ustring& part2, const ustring& part3, const ustring& part4)
// Wrapper for g_build_filename, to make programming easier.
{
  ustring filename;
  gchar *name;
  name = g_build_filename (part1.c_str(), part2.c_str(), part3.c_str(), part4.c_str(), NULL);
  filename = name;
  g_free (name);
  return filename;
}


ustring usfm_extract (ustring & line)
{
  /*
     This returns the usfm from the line, e.g. \id.
     The usfm is removed from the line.
   */
  ustring s = trim (line);
  ustring returnvalue;
  if (s.length () > 1) {
    if (s.substr (0, 1) == "\\") {
      size_t endposition = s.find_first_of (" *", 1);
      if (endposition == string::npos) {
        line = "";
        returnvalue = s;
      } else {
        returnvalue = s.substr (0, endposition);
        line.erase (0, endposition);
        line = trim (line);
      }
    }
  }
  return returnvalue;
}


ustring usfm_extract_marker (ustring & line)
{
  // Returns the usfm marker from the line, but without
  // the first backslash, so e.g. id.
  // The backslash and the usfm marker are removed
  // from the line.
  ustring returnvalue = usfm_extract (line);
  if (returnvalue.length () > 0)
    returnvalue.erase (0, 1);   // Remove backslash.
  return returnvalue;
}


ustring usfm_extract_within_line (ustring & line)
/*
This returns the usfm from the line, e.g. \id.
The usfm is removed from the line.
*/
{
  line = trim (line);
  ustring returnvalue;
  size_t offposition;
  offposition = line.find ("\\");
  if (offposition != string::npos) {
    line.erase (0, offposition);
    size_t endposition;
    endposition = line.find_first_of (" *", 1);
    if (endposition != string::npos) {
      returnvalue = line.substr (0, ++endposition);
      line.erase (0, endposition);
    } else {
      returnvalue = line;
      line.clear();
    }
  }
  return returnvalue;
}


ustring usfm_extract_marker_within_line (ustring & line)
/*
Returns the usfm marker from the line, but without the first backslash, e.g. id.
The backslash and the usfm marker are removed from the line.
It searches within the line too.
*/
{
  ustring returnvalue = trim(usfm_extract_within_line (line));
  if (returnvalue.length () > 0)
    returnvalue.erase (0, 1);   // Remove backslash.
  return returnvalue;
}


vector<ustring> usfm_split_on_main_markers (const ustring& line)
/*
Splits the line on the main markers: id, c and v.
Returns the line, as it is, or split up in parts.
*/
{
  // Storage and variables.
  vector<ustring> split_lines;
  vector<size_t> split_positions;
  size_t position;

  // Find cutting postions for \id.  
  position = 1;
  position = line.find ("\\id ", position);
  while (position != string::npos) {
    split_positions.push_back (position);
    position = line.find ("\\id ", ++position);
  }
  // Find cutting postions for \c.
  position = 1;
  position = line.find ("\\c ", position);
  while (position != string::npos) {
    split_positions.push_back (position);
    position = line.find ("\\c ", ++position);
  }
  // Find cutting postions for \v.
  position = 1;
  position = line.find ("\\v ", position);
  while (position != string::npos) {
    split_positions.push_back (position);
    position = line.find ("\\v ", ++position);
  }
  
  // Sort the cutting positions.
  sort (split_positions.begin(), split_positions.end());

  if (split_positions.empty()) {
    // Nothing to split: just add the line.
    split_lines.push_back (line);
  } else {
    // Add all the parts.
    size_t begin = 0;
    size_t end;
    split_positions.push_back (line.length());
    for (unsigned int i = 0; i < split_positions.size(); i++) {
      end = split_positions[i];
      split_lines.push_back (line.substr (begin, end - begin));
      begin = end;
    }
  }    
  
  // Return the lines.
  return split_lines;
}


vector<ustring> usfm_get_all_markers (const ustring& line)
// Returns all markers that are found in the line.
{
  vector<ustring> markers;
  ustring marker;
  ustring line2 (line);
  marker = usfm_extract_marker_within_line (line2);
  while (!marker.empty()) {
    markers.push_back (marker);
    marker = usfm_extract_marker_within_line (line2);
  }
  return markers;
}


char *USFM_IDs[NUMBER_OF_BIBLEBOOKS] =
  { "GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT", "1SA", "2SA",
    "1KI", "2KI", "1CH", "2CH", "EZR", "NEH", "EST", "JOB", "PSA", "PRO",
    "ECC", "SNG", "ISA", "JER", "LAM", "EZK", "DAN", "HOS", "JOL", "AMO", "OBA",
    "JON", "MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL",
    "MAT", "MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL", "EPH", "PHP",
    "COL", "1TH", "2TH", "1TI", "2TI", "TIT", "PHM", "HEB", "JAS",
    "1PE", "2PE", "1JN", "2JN", "3JN", "JUD", "REV"
};
char *OSIS_IDs[NUMBER_OF_BIBLEBOOKS] =
  { "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", "Ruth", "1Sam",
    "2Sam", "1Kgs", "2Kgs", "1Chr", "2Chr", "Ezra", "Neh", "Esth", "Job",
    "Ps", "Prov", "Eccl", "Song", "Isa", "Jer", "Lam", "Ezek", "Dan", "Hos",
    "Joel", "Amos", "Obad", "Jonah", "Mic", "Nah", "Hab", "Zeph",
    "Hag", "Zech", "Mal",
    "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor", "Gal", "Eph",
    "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Titus",
    "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", "3John", "Jude",
    "Rev"
};
char *EnglishBookNames[NUMBER_OF_BIBLEBOOKS] =
  { "Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua",
    "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings",
    "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Esther", "Job",
    "Psalms", "Proverbs", "Ecclesiastes", "Song of Solomon", "Isaiah",
    "Jeremiah", "Lamentations", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos",
    "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah",
    "Haggai", "Zechariah", "Malachi",
    "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians",
    "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians",
    "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus",
    "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John",
    "2 John", "3 John", "Jude", "Revelation"
};


ustring id_to_biblebook_english (const ustring & id)
{
  // Takes id and returns an English bookname.
  // E.g. GEN returns "Genesis".
  for (unsigned int i = 0; i < NUMBER_OF_BIBLEBOOKS; i++)
    {
      // Note: Allow for mixed case IDs for extra robustness.
      if (upperCase (id) == USFM_IDs[i])
        return EnglishBookNames[i];
    }
  return "Unknown";
}


void xml_handle_entities (ustring& line, vector <size_t>& positions) 
/*
  Changes the < and the > in the text to the xml entities &lt; and &gt
  Changes the ampersand (&) to &amp;
  Any positions affected by this will be adjusted.
*/
{
  size_t offposition;
  // Deal with &. This one is done first, 
  // else the ampersands inserted later will get changed too.
  offposition  = line.find ("&");
  while (offposition != string::npos)
  {
    line.replace (offposition, 1, "&amp;");
    // Do not repeat on the & just removed and inserted, but start searching after it.
    offposition = line.find ("&", offposition + 3);
  }  
  // Deal with <
  offposition  = line.find ("<");
  while (offposition != string::npos)
  {
    line.replace (offposition, 1, "&lt;");
    offposition = line.find ("<", offposition);
  }  
  // Deal with >
  offposition  = line.find (">");
  while (offposition != string::npos)
  {
    line.replace (offposition, 1, "&gt;");
    offposition = line.find (">", offposition);
  }  
}


ustring xml_text_embed_in_tags (int level, const ustring& tag, const ustring& text)
{
  // Handle < and > and &.
  ustring text2 (text);
  vector <size_t> positions;
  xml_handle_entities (text2, positions);
  // Embed the text.
  ustring result;
  for (int i = 0; i < level; i++)
    result.append ("  ");
  result.append ("<" + tag + ">" + text2 + "</" + tag + ">");
  return result;
}


ustring xml_tag (int level, const ustring tag, bool closing)
{
  ustring result;
  for (int i = 0; i < level; i++)
    result.append ("  ");
  result.append ("<");
  if (closing)
    result.append ("/");
  result.append (tag);
  result.append (">");
  return result;  
}


void output_xml_message (const ustring& book, const ustring& chapter, const ustring& verse, const ustring& message)
{
  cout << xml_tag (0, MESSAGE_TAG, false) << endl;
  cout << xml_text_embed_in_tags (1, BOOK_TAG, book) << endl;
  cout << xml_text_embed_in_tags (1, CHAPTER_TAG, chapter) << endl;
  cout << xml_text_embed_in_tags (1, VERSE_TAG, verse) << endl;
  ustring s (xml_text_embed_in_tags (1, TEXT_TAG, message));
  write (1, s.c_str(), strlen (s.c_str()));
  write (1, "\n", 1);
  cout << xml_tag (0, MESSAGE_TAG, true) << endl;
}


void output_xml_message (const ustring& book, int chapter, const ustring& verse, const ustring& message)
{
  output_xml_message (book, convert_to_string (chapter), verse, message);
}


void quick_swap(ustring& a, ustring& b)
{
  ustring t = a;
  a = b;
  b = t; 
}


void quick_swap(unsigned int& a, unsigned int& b)
{
  unsigned int t = a;
  a = b;
  b = t; 
}


void quick_sort(vector<unsigned int>& one, vector<ustring>& two, unsigned int beg, unsigned int end)
/*
This function is unusual in the sense that it does not sort one container, as
the big majority of sort functions do, but it accepts two containers.
It sorts on the first, and reorders the second container at the same time, 
following the reordering done in the first container.
*/
{
  if (end > beg + 1) {
    unsigned int piv = one[beg];
    unsigned int l = beg + 1;
    unsigned int r = end;
    while (l < r) {
      if (one[l] <= piv) {
        l++;
      } else {
        --r;
        quick_swap(one[l], one[r]);
        quick_swap(two[l], two[r]);
      }
    }
    --l;
    quick_swap(one[l], one[beg]);
    quick_swap(two[l], two[beg]);
    quick_sort(one, two, beg, l);
    quick_sort(one, two, r, end);
  }
}


void quick_sort(vector<ustring>& one, vector<unsigned int>& two, unsigned int beg, unsigned int end)
{
  if (end > beg + 1) {
    ustring piv = one[beg];
    unsigned int l = beg + 1;
    unsigned int r = end;
    while (l < r) {
      if (one[l] <= piv) {
        l++;
      } else {
        --r;
        quick_swap(one[l], one[r]);
        quick_swap(two[l], two[r]);
      }
    }
    --l;
    quick_swap(one[l], one[beg]);
    quick_swap(two[l], two[beg]);
    quick_sort(one, two, beg, l);
    quick_sort(one, two, r, end);
  }
}


ReadDirectories::ReadDirectories (const ustring & path, const ustring & prefix,
                                  const ustring & suffix)
{
  // Reads the directories in directory "path" that end on "suffix".
  // It does not return regular files.
  try
  {
    GDir *dir = g_dir_open (path.c_str(), 0, NULL);
    const gchar *s;
    vector <ustring> entries;
    while ((s = g_dir_read_name (dir)) != NULL)
      entries.push_back (s);
    g_dir_close (dir);    
    for (unsigned int i = 0; i < entries.size (); i++) {
      if (g_str_has_suffix (entries[i].c_str(), suffix.c_str()))
        if (g_str_has_prefix (entries[i].c_str(), prefix.c_str()))
          if (g_file_test (gw_build_filename (path, entries[i]).c_str(),  G_FILE_TEST_IS_DIR))
            directories.push_back (entries[i]);
    }
  }
  catch (...)
  {
  }
}


ReadDirectories::~ReadDirectories ()
{
}


ReadFiles::ReadFiles (const ustring & path, const ustring & prefix,
                      const ustring & suffix)
{
  // Reads the regular files in directory "path" that end on "suffix".
  // It does not return directories.
  try
  {
    GDir *dir = g_dir_open (path.c_str(), 0, NULL);
    const gchar *s;
    vector <ustring> entries;
    while ((s = g_dir_read_name (dir)) != NULL)
      entries.push_back (s);
    g_dir_close (dir);    
    for (unsigned int i = 0; i < entries.size (); i++) {
      if (g_str_has_suffix (entries[i].c_str(), suffix.c_str()))
        if (g_str_has_prefix (entries[i].c_str(), prefix.c_str()))
          if (!g_file_test (gw_build_filename (path, entries[i]).c_str(), G_FILE_TEST_IS_DIR))
            files.push_back (entries[i]);
    }
  }
  catch (...)
  {
  }
}


ReadFiles::~ReadFiles ()
{
}


ReadText::ReadText (const ustring & file, bool silent, bool trimming)
{
  // Reads the text and stores it line by line, trimmed, into "Lines".
  // If "silent" is true, then no exception will be thrown in case of an error.
  // The lines will be trimmed if "trimming" is true.
  ifstream in (file.c_str ());
  if (!in) {
    if (!silent) {
      cerr << "Error opening file " << file << endl;
      throw;
    }
    return;
  }
  string s;
  while (getline (in, s)) {
    if (trimming)
      s = trim (s);
    lines.push_back (s);
  }
}


ReadText::~ReadText ()
{
}


WriteText::WriteText (const ustring & file, bool append)
{
  /*
     This opens a textfile for writing.
     If 'append' is set, it appends the output to an existing file.
   */
  if (append) {
    out.open (file.c_str (), ios_base::out | ios_base::app);
  }
  else
  {
    out.open (file.c_str ());
  }
  if (!out)
  {
    throw runtime_error ("Error creating file " + file);
  }
}


WriteText::~WriteText ()
{
}


Parse::Parse (const ustring & line, bool remove_punctuation)
{
  /*
  Parses a line of text in its separate words.
   */
  ustring processed_line;
  processed_line = trim (line);
  processed_line.append (" ");
  size_t spaceposition;
  spaceposition = processed_line.find (" ");
  while (spaceposition != string::npos) {
    ustring word = processed_line.substr (0, spaceposition);
    if (remove_punctuation) {
      string::size_type location = word.find_last_of (".,;:");
      if (location != string::npos)
        word = word.substr (0, location);
    }
    words.push_back (word);
    processed_line.erase (0, spaceposition + 1);
    spaceposition = processed_line.find (" ");
  }
}


Parse::~Parse ()
{
}


ParseLine::ParseLine (const ustring & text)
{
  /*
     Parses text in its separate lines.
   */
  ustring processed_line;
  processed_line = trim (text);
  size_t newlineposition;
  newlineposition = processed_line.find ("\n");
  while (newlineposition != string::npos) {
    ustring word = processed_line.substr (0, newlineposition);
    lines.push_back (trim(word));
    processed_line.erase (0, newlineposition + 1);
    processed_line = trim (processed_line);
    newlineposition = processed_line.find ("\n");
  }
  if (!processed_line.empty())
    lines.push_back(trim(processed_line));
}


ParseLine::~ParseLine ()
{
}

Generated by  Doxygen 1.6.0   Back to index