Logo Search packages:      
Sourcecode: bibledit version File versions  Download package

compare-usfm.cpp

/*
    Copyright (C) 2003-2006 Teus Benschop.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/


#include "libraries.h"
#include "utilities.h"
#include "constants.h"


// Storage for some paramaeters.
set<ustring> include_only;
set<ustring> ignore;
bool ignore_verse_zero = false;


void store_marker (unsigned int chapter, vector<unsigned int>& chapters,
                   ustring& verse, vector<ustring>& verses,
                   ustring& marker, vector<ustring>& markers,
                   vector<bool>& checked)
// We've a new marker. Store it at the right position in the containers.
{
  // If we ignore verse zero, deal with that.
  if (ignore_verse_zero) {
    if (verse == "0")
      return;
  }
  // Check whether to include this marker.
  bool include;
  if (include_only.empty()) {
    include = true;
  }
  else {
    include = include_only.find (marker) != include_only.end();
  }
  if (!ignore.empty()) {
    if (ignore.find (marker) != ignore.end())
      include = false;
  }
  // Containers empty? Make room, store data, and quit.
  if (chapters.empty()) {
    chapters.push_back (chapter);
    verses.push_back (verse);
    if (include)
      markers.push_back (marker);
    else
      markers.push_back ("");
    checked.push_back (false);
    return;
  }
  // Get pointer to last element in container.
  int pointer;
  pointer = chapters.size() - 1;
  // If current verse/chapter have been stored already, add the marker to the 
  // existing ones, and quit.
  if ((verse == verses[pointer]) && chapter == chapters[pointer]) {
    if (include) {
      if (!markers[pointer].empty())
        markers[pointer].append (" ");
      markers[pointer].append (marker);
    }
    return;
  }
  // New verse or chapter: make room in containers, store data and quit.
  chapters.push_back (chapter);
  verses.push_back (verse);
  if (include)
    markers.push_back (marker);
  else
    markers.push_back ("");
  checked.push_back (false);
}


int main (int argc, char *argv[])
{
  // Storage for parameters on commandline.
  ustring file_under_review;
  ustring file_to_refer_to;
  // Process command line arguments.
  for (int i = 1; i < argc; i++) {
    ustring argument;
    argument = argv[i];
    if (argument.length() > 2) {
      if (argument.substr (0, 2) == "--") {
        argument.erase (0, 2);
        if (argument == "include-only") {
          ustring argument = argv[++i];
          Parse parse (argument);
          for (unsigned int i = 0; i < parse.words.size(); i++)
            include_only.insert (parse.words[i]);
        }
        else if (argument == "ignore") {
          ustring argument = argv[++i];
          Parse parse (argument);
          for (unsigned int i = 0; i < parse.words.size(); i++)
            ignore.insert (parse.words[i]);
        }
        else if (argument == "ignore-verse-zero") {
          ignore_verse_zero = true;
        }
        continue;
      }
    }
    if (file_under_review.empty())
      file_under_review = argument;
    else
      file_to_refer_to = argument;
  }
  
  if (argc == 1) {
    cout << "sc-compare-usfm reads two USFM files given as arguments on the commandline." << endl;
    cout << "The first file is the one under review." << endl;
    cout << "The second file is the standard to refer to." << endl;
    cout << "Where the file under review differs from the standard, regarding the USFMs" << endl;
    cout << "it contains, messages will be given." << endl;
    cout << "Parameters on the commandline, beside the two files:" << endl;
    cout << "--include-only <list of markers to check for>" << endl;
    cout << "  This list of markers does not contain the backslashes." << endl;
    cout << "  The markers are separated by a space." << endl;
    cout << "  If this parameter is omitted, it checks for any marker." << endl;
    cout << "--ignore <list of markers to ignore>" << endl;
    cout << "--ignore-verse-zero" << endl;
    cout << "  Any markers before the first verse are ignored." << endl;
    return 0;
    /*
    Note that although options for ignoring footnotes and crossreferences could
    be implemented, there is really no need for it, because the markers for 
    footnotes and crossreferences can be given in the --ignore parameter.
    */
  }
    
  if (file_under_review.empty() || file_to_refer_to.empty()) {
    cerr << "Two filenames needed" << endl;
    return 1;
  }
  if (file_under_review == file_to_refer_to) {
    cerr << "Comparing file with itself" << endl;
    return 1;
  }
    
  // Variables for our use.
  ustring bookname = "Unknown";
  unsigned int chapter = 0;
  ustring verse = "0";
  vector<unsigned int> chapters1;
  vector<ustring> verses1;
  vector<ustring> markers1;
  vector<bool> checked1;
  vector<unsigned int> chapters2;
  vector<ustring> verses2;
  vector<ustring> markers2;
  vector<bool> checked2;

  // Read the file under review and go through all lines it has.
  ReadText rt (file_under_review, false, false);
  for (unsigned int i = 0; i < rt.lines.size(); i++) {
    // Go through all markers in this line.
    ustring marker = usfm_extract_marker_within_line (rt.lines[i]);
    while (!marker.empty()) {
      // Obtain book, chapter and verse.
      if (marker == "id") {
        ustring id = rt.lines[i].substr (0, 3);
        bookname = id_to_biblebook_english (id);
      } else if (marker == "c") {
        chapter = convert_to_int (number_in_string (rt.lines[i]));
        verse = "0";
      } else if (marker == "v") {
        verse = rt.lines[i];
        size_t spaceposition = verse.find (" ");
        if (spaceposition != string::npos) {
          verse = verse.substr (0, spaceposition);
        }
      }
      // Store current marker.
      store_marker (chapter, chapters1, verse, verses1, marker, markers1, checked1);
      // Get next markers.
      marker = usfm_extract_marker_within_line (rt.lines[i]);
    }
  }

  // Reset some variables.
  chapter = 0;
  verse = "0";
  // Read the file to refer to and go through all lines it has.
  ReadText rt2 (file_to_refer_to, false, false);
  for (unsigned int i = 0; i < rt2.lines.size(); i++) {
    // Go through all markers in this line.
    ustring marker = usfm_extract_marker_within_line (rt2.lines[i]);
    while (!marker.empty()) {
      // Obtain book, chapter and verse.
      if (marker == "id") {
        ustring id = rt2.lines[i].substr (0, 3);
        if (id_to_biblebook_english (id) != bookname) {
          cerr << "Cannot compare different books" << endl;
          return 1;
        }
      } else if (marker == "c") {
        chapter = convert_to_int (number_in_string (rt2.lines[i]));
        verse = "0";
      } else if (marker == "v") {
        verse = rt2.lines[i];
        size_t spaceposition = verse.find (" ");
        if (spaceposition != string::npos) {
          verse = verse.substr (0, spaceposition);
        }
      }
      // Store current marker.
      store_marker (chapter, chapters2, verse, verses2, marker, markers2, checked2);
      // Get next markers.
      marker = usfm_extract_marker_within_line (rt2.lines[i]);
    }
  }

  // Do the actual comparison.
  for (unsigned int i = 0; i < chapters1.size(); i++) {
    // Look for same chapters and verses in the reference data.
    int pointer = -1;
    for (unsigned int i2 = 0; i2 < chapters2.size(); i2++) {
      if (chapters1[i] == chapters2[i2]) {
        if (verses1[i] == verses2[i2]) {
          pointer = i2;
          break;
        }
      }
    }
    if (pointer >= 0) {
      // Do comparison of markers.
      if (markers1[i] != markers2[pointer]) {
        ustring message;
        message = "Markers differ: ";
        message.append (markers1[i]);
        message.append (" (");
        message.append (markers2[pointer]);
        message.append (")");
        output_xml_message (bookname, chapters1[i], verses1[i], message);
      }
      // Mark these verses in the standard as checked.
      checked2[pointer] = true;
    }
    else {
      // This chapter with this verse was not found in the reference data.
      output_xml_message (bookname, chapters1[i], verses1[i], "Extra verse");
    }
    // Mark this book as checked.
    checked1[i] = true;
  }
  // See whether the standard, the file to refer to, does have some unchecked data.
  for (unsigned int i = 0; i < chapters2.size(); i++) {
    if (!checked2[i]) {
      output_xml_message (bookname, chapters2[i], verses2[i], "Missing verse");
    }
  }
  
  return 0;
}

Generated by  Doxygen 1.6.0   Back to index