Logo Search packages:      
Sourcecode: bibledit version File versions  Download package

compareutils.cpp

/*
** Copyright (C) 2003-2006 Teus Benschop.
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**  
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**  
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**  
*/


#include "libraries.h"
#include "utilities.h"
#include <libgen.h>
#include <glib.h>
#include <config.h>
#include "constants.h"
#include "compareutils.h"
#include "usfmtools.h"
#include "usfm.h"
#include "printproject.h"
#include "printreferences.h"
#include "projectutils.h"
#include "generalconfig.h"


enum Markup { StrikeThrough, Bold };


void compare_chapter_internal (
      const vector<ustring>& originalusfms, const vector<ustring>& originaltext, 
      const vector<ustring>& secondusfms, const vector<ustring>& secondtext, 
      vector<ustring>& outputchapter,
      bool optimize, 
      unsigned int look_forward_lines,
      int& number_of_changes);
ustring insert_markup_in_line (const ustring& usfm, const ustring& text, Markup markup);
int compare_word_by_word (const ustring& newline, const ustring& oldline, ustring& outputline);
void compare_word_by_word_internal (
      const ustring& newline, const ustring& oldline, ustring& outputline,
      bool optimize, int look_forward_words, unsigned int& number_of_changes);



void compare_with ()
{
  // Configuration
  GeneralConfiguration genconfig (0);
  // Progress system.
  ProgressWindow progresswindow ("Comparing", true);
  progresswindow.set_text ("Preparing projects");
  progresswindow.set_fraction (0.1);
  // Copy current project to a new project.
  ustring newproject;
  newproject = genconfig.project() + " compared with " + genconfig.project_to_compare_with();
  project_copy (genconfig.project(), newproject);
  if (progresswindow.cancel) {
    return;
  }
  // Do the actual comparison.
  compare_projects (genconfig.project(), genconfig.project_to_compare_with(), newproject, progresswindow);
  if (progresswindow.cancel) {
    return;
  }
  // See what to print.
  progresswindow.set_text ("Printing differences");
  string project = genconfig.project();
  genconfig.project_set (newproject);
  if (genconfig.print_changes_only()) {
    // Print changes only.
    vector<ustring> references;
    compare_get_changes (newproject, references);
    view_references_pdf (references, progresswindow);
  } else {
    // Print whole modified project.
    view_project_pdf (progresswindow);
  }
  genconfig.project_set (project);
  if (progresswindow.cancel) {
    return;
  }
  // Delete the modified project.
  project_delete (newproject);
}


void compare_projects (const ustring& originalproject, const ustring& secondproject,
                       const ustring& outputproject, ProgressWindow& progresswindow)
/*
Compares originalproject with secondproject, 
and puts the differences in outputproject.
*/
{
  // Progress information.
  progresswindow.set_text ("Starting comparison");
  // Open the Scriptures.
  vector<ustring> originalscripture_books = project_get_books (originalproject);
  vector<ustring> secondscripture_books = project_get_books (secondproject);
  vector<ustring> outputscripture = project_get_books (outputproject);
  // Progress information.
  progresswindow.set_iterate (0.1, 0.5, originalscripture_books.size());
  // Go through the original scripture.
  for (unsigned int ib = 0; ib < originalscripture_books.size(); ib++) {
    // Progress.
    progresswindow.iterate ();
    // If this book does not exist in the second project,
    // continue to the next book, without comparing.
    if (!project_book_exists (secondproject, originalscripture_books[ib]))
      continue;
    if (progresswindow.cancel) {
      return;
    }
    // Go through each chapter of the original book.
    vector<unsigned int> originalchapters;
    originalchapters = project_get_chapters (originalproject, originalscripture_books[ib]);
    for (unsigned int ic = 1; ic < originalchapters.size(); ic++) {
      // Progress info.
      ustring info = originalscripture_books[ib] + " " + convert_to_string (originalchapters[ic]);
      progresswindow.set_text (info);
      if (progresswindow.cancel) {
        return;
      }
      // Get chapter of the original.
      vector <ustring> originallines;
      originallines = project_retrieve_chapter (originalproject, originalscripture_books[ib], originalchapters[ic]);
      // Get chapter of the second.
      vector <ustring> secondlines;
      secondlines = project_retrieve_chapter (secondproject, originalscripture_books[ib], originalchapters[ic]);
      // If both chapters are the same, continue to the next one.
      ustring originalbook_chapter_contents;
      for (unsigned int i = 0; i < originallines.size(); i++)
        originalbook_chapter_contents.append (originallines[i]);
      ustring secondbook_chapter_contents;
      for (unsigned int i = 0; i < secondlines.size(); i++)
        secondbook_chapter_contents.append(secondlines[i]);
      if (originalbook_chapter_contents == secondbook_chapter_contents)
        continue;
      // At this stage the chapters are different. Compare them more thoroughly.
      vector <ustring> outputlines;
      outputlines = project_retrieve_chapter (outputproject, originalscripture_books[ib], originalchapters[ic]);
      compare_chapter (originallines, secondlines, outputlines);
      CategorizeChapterVerse ccv (outputlines);
      project_store_chapter (outputproject, originalscripture_books[ib], ccv);
    }      
  }
}



void compare_chapter (vector<ustring>& originalchapter, vector<ustring>& secondchapter, vector<ustring>& outputchapter)
/*
Compares original chapter with second chapter, and outputs the differences
in output chapter.
It is assumed that the chapters differ.
*/
{
  // Split all text into usfms and text. Do it once, and speed up things.
  // E.g. "\v 1 In the beginning ..." is split into:
  // - \v 1 
  // - In the beginning ...
  vector<ustring> originalusfms;
  vector<ustring> originaltext;
  for (unsigned int i = 0; i < originalchapter.size(); i++) {
    ustring marker;
    ustring text;
    split_line_into_usfm_and_text (originalchapter[i], marker, text);
    originalusfms.push_back(marker);
    originaltext.push_back(text);
  }
  vector<ustring> secondusfms;
  vector<ustring> secondtext;
  for (unsigned int i = 0; i < secondchapter.size(); i++) {
    ustring marker;
    ustring text;
    split_line_into_usfm_and_text (secondchapter[i], marker, text);
    secondusfms.push_back(marker);
    secondtext.push_back(text);
  }
  // Find out how many lines the comparison needs to look forward to obtain best results.
  int number_of_changes;
  int lowest_number_of_changes = INT_MAX;
  int look_forward_length = 0;
  for (unsigned int i = 0; i < 6; i++) {
    compare_chapter_internal (originalusfms, originaltext, secondusfms, secondtext,
                              outputchapter, true, i, number_of_changes);
    if (number_of_changes < lowest_number_of_changes) {
      look_forward_length = i;
      lowest_number_of_changes = number_of_changes;
    }
  }    
  // Using the optimised parameter, make the comparison.
  outputchapter.clear();
  compare_chapter_internal (originalusfms, originaltext, secondusfms, secondtext,
                            outputchapter, false, look_forward_length, number_of_changes);
}


void compare_chapter_internal (
      const vector<ustring>& originalusfms, const vector<ustring>& originaltext, 
      const vector<ustring>& secondusfms, const vector<ustring>& secondtext, 
      vector<ustring>& outputchapter,
      bool optimize, 
      unsigned int look_forward_lines,
      int& number_of_changes)
/*
This one is used by "compare_chapter", but it is used in the optimization 
process also, to find out which algorithm detects the lowest number of changes.
*/
{
  // Variabeles and initialization of them.
  number_of_changes = 0;
  // Save the second chapter to a temporal variable.
  vector<ustring> usfm2 (secondusfms.begin(), secondusfms.end());
  vector<ustring> text2 (secondtext.begin(), secondtext.end());
  // Go through all the original lines.
  for (unsigned int c1c = 0; c1c < originalusfms.size(); c1c++) {
    // Still text left in second chapter?
    if (usfm2.size() > 0) {
      if (originalusfms[c1c].find(usfm2[0]) != string::npos) {
        // Usfm found at right position: check this verse word by word.
        if (!optimize) {
          ustring outputline;
          number_of_changes += compare_word_by_word (originaltext[c1c], text2[0], outputline);
          outputchapter.push_back (originalusfms[c1c] + " " + outputline);
        }
        // Remove this line as we're through with it.
        usfm2.erase(usfm2.begin());
        text2.erase(text2.begin());
      } else {
        bool sfm_found = false;
        int i = -1;
        // Check max n lines ahead. This variable is optimized for best results.
        for (unsigned int fc = 0; fc <= look_forward_lines; fc++) {
          if ((!sfm_found) && (fc < usfm2.size())) {
            if (originalusfms[c1c].find(usfm2[fc]) != string::npos) {
              sfm_found = true;
              i = fc;
            }
          }
        }
        if (i >= 0) {
          // Old text has extra lines, add them with strikeout.
          for (int fc = 0; fc < i; fc++) {
            number_of_changes++;
            if (!optimize) {
              outputchapter.push_back (insert_markup_in_line (usfm2[0], text2[0], StrikeThrough));
            }
            usfm2.erase(usfm2.begin());
            text2.erase(text2.begin());
          }
          // After adding extra lines, add the current in normal text.
          if (!optimize) {
            ustring outputline;
            number_of_changes += compare_word_by_word (originaltext[c1c], text2[0], outputline);
            outputchapter.push_back (originalusfms[c1c] + " " + outputline);
          }
          usfm2.erase(usfm2.begin());
          text2.erase(text2.begin());
        } else if (i < 0) {
          // Usfm not found, add new text with bold.
          number_of_changes++;
          if (!optimize) {
            outputchapter.push_back (insert_markup_in_line (originalusfms[c1c], originaltext[c1c], Bold));
          }
        }
      }
    } else {
      // Old text is finished, add remaining line with bold.
      number_of_changes++;
      if (!optimize) {
        outputchapter.push_back (insert_markup_in_line (originalusfms[c1c], originaltext[c1c], Bold));
      }
    }
  }    
  // After comparison, what is left in the old text, add it with strikeout.
  for (unsigned int i = 0; i < usfm2.size(); i++) {
    number_of_changes++;
    if (!optimize) {
      outputchapter.push_back (insert_markup_in_line (usfm2[i], text2[i], StrikeThrough));
    }
  }
}
  

ustring insert_markup_in_line (const ustring& usfm, const ustring& text, Markup markup)
/*
// The tags to format text to be shown in Strike-Through are <strike> and </strike>.
// We do not yet insert them here, because the html formatter will 
// transfer this text converted into the output, so that <strike> shows as
// <strike> in the browser.
// Instead placeholders are inserted, which will be converted to the right
// tags in the printing engine.
The markup for strike-through or bold cannot just be inserted in a line like
so:
MARKUP_BEGIN\v 1 In the beginning ... earth.MARKUP_END
This would disturb the processing of the USFMs.
Instead it should be inserted so:
\v 1 MARKUP_BEGINIn the beginning ... earth.MARKUP_END
This function cares for that.
*/
{
  string markupbegin;
  string markupend;
  if (markup == StrikeThrough) {
    markupbegin = STRIKE_THROUGH_BEGIN;
    markupend = STRIKE_THROUGH_END;
  } else if (markup == Bold) {
    markupbegin = BOLD_BEGIN;
    markupend = BOLD_END;
  }
  ustring s;
  if (!text.empty())
    s = usfm + " " + markupbegin + text + markupend;
  else
    s = usfm;
  return s;
}


int compare_word_by_word (const ustring& newline, const ustring& oldline, ustring& outputline)
{
  // Find out how many words the comparison needs to look forward
  // to obtain best results.
  unsigned int lowest_number_of_changes = INT_MAX;
  unsigned int look_forward_length = 0;
  unsigned int number_of_changes = 0;
  for (unsigned int i = 0; i <= 10; i++) {
    compare_word_by_word_internal (newline, oldline, outputline, true, 3 * i, number_of_changes);
    if (number_of_changes < lowest_number_of_changes) {
      look_forward_length = 3 * i;
      lowest_number_of_changes = number_of_changes;
    }
  }
  // Using the optimised parameter, make the comparison.
  number_of_changes = 0;
  compare_word_by_word_internal (newline, oldline, outputline, false, look_forward_length, number_of_changes);
  return number_of_changes;
}


void compare_word_by_word_internal (
      const ustring& newline, const ustring& oldline, ustring& outputline,
      bool optimize, int look_forward_words, unsigned int& number_of_changes)
{
  // Initialize.
  number_of_changes = 0;
  // Fill "words1" with the words from the new line.
  // Spaces disturb the comparison process: leave them out.
  vector <ustring> words1;
  Parse parse1 (newline, false);
  words1.assign (parse1.words.begin(), parse1.words.end());
  // Fill "words2" with the words from the old line.
  // Spaces disturb the comparison process: leave them out.
  vector <ustring> words2;
  Parse parse2 (oldline, false);
  words2.assign (parse2.words.begin(), parse2.words.end());
  // Go through all the words, and compare them.
  for (unsigned int w1c = 0; w1c < words1.size(); w1c++) {
    ustring newword (words1[w1c]);
    // Spaces are not dealt with in the comparison, because they only mix it up.
    // But they are put back here in the output line.
    if (!outputline.empty())
      if (!optimize)
        outputline.append (" ");
    // See whether a word can be found that is the same, in the second text.
    if (words2.size() > 0) {
      // Look for that word. Look forward only "look_forward_words" positions,
      // or less, depending on how many words are still left in the line.
      int index = -1;
      unsigned int highlimit = look_forward_words;
      highlimit = CLAMP (highlimit, 0, words2.size());
      for (unsigned int i = 0; i < highlimit; i++) {
        if (newword == words2[i]) {
          index = i;
          break;
        }
      }
      if (index == 0) {
        // Word found at right position: add it to the output.
        if (!optimize)
          outputline.append (newword);
        words2.erase (words2.begin());
      } else if (index > 0) { 
        // Equal word is near.
        // Old sentence has extra words, add them with strikeout.
        for (int fc = 0; fc < index; fc++) {
          number_of_changes++;
          if (!optimize)
            outputline.append (STRIKE_THROUGH_BEGIN + words2[0] + STRIKE_THROUGH_END + " ");
          words2.erase (words2.begin());
        }
        // After adding extra lines, add the current in normal text.
        if (!optimize)
          outputline.append (newword);
        words2.erase (words2.begin());
      } else if (index < 0) {
        // Word not found near current position, add new words with bold.
        number_of_changes++;
        if (!optimize)
          outputline.append (BOLD_BEGIN + newword + BOLD_END);
      }
    } else {
      // Old text is finished, add remaining new words with bold.
      number_of_changes++;
      if (!optimize)
        outputline.append (BOLD_BEGIN + newword + BOLD_END);
    }
  }
  // After comparison, what is left in the old text, add it with strikeout.
  for (unsigned int i = 0; i < words2.size(); i++) {
    number_of_changes++;
    if (!optimize) {
      ustring s;
      s.append (" ");
      s.append (STRIKE_THROUGH_BEGIN);
      s.append (words2[i]);
      s.append (STRIKE_THROUGH_END);
      outputline.append (s);
    }
  }
}


void compare_get_changes (const ustring& project, vector <ustring>& changed_references)
// This produces all changed references in a project.
{
  // Go through the whole scripture.
  vector <ustring> scripture_books = project_get_books (project);
  for (unsigned int i = 0; i < scripture_books.size (); i++) {
    vector <unsigned int> chapters = project_get_chapters (project, scripture_books[i]);
    for (unsigned int chapter = 1; chapter < chapters.size(); chapter++) {
      vector <ustring> lines;
      lines = project_retrieve_chapter (project, scripture_books[i], chapter);
      ustring book_chapter = scripture_books[i] + " " + convert_to_string (chapter) + ":";
      ustring currentverse = "1";
      ustring last_reference_stored;
      for (unsigned int i2 = 0; i2 < lines.size (); i2++) {
        ustring line (lines[i2]);
        // Extract the verse number.
        ustring marker;
        marker = usfm_extract_marker (line);
        if (usfm_is_verse (marker)) {
          currentverse = number_in_string (line);
        }
        // See if this line contains a change.
        bool add_this_line = false;
        if (line.find (STRIKE_THROUGH_BEGIN) != string::npos) {
          add_this_line = true;
        } else if (line.find (STRIKE_THROUGH_END) != string::npos) {
          add_this_line = true;
        } else if (line.find (BOLD_BEGIN) != string::npos) {
          add_this_line = true;
        } else if (line.find (BOLD_END) != string::npos) {
          add_this_line = true;
        }
        if (add_this_line) {
          string ref = book_chapter + currentverse;
          if (ref != last_reference_stored) {
            changed_references.push_back(ref);
            last_reference_stored = ref;
          }
        }          
      }      
    }
  }
}

Generated by  Doxygen 1.6.0   Back to index