Logo Search packages:      
Sourcecode: bibledit version File versions  Download package

scripturechecks.cpp

/*
** Copyright (C) 2003-2006 Teus Benschop.
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**  
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**  
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**  
*/


#include "libraries.h"
#include "utilities.h"
#include <libgen.h>
#include <glib.h>
#include <config.h>
#include "gwrappers.h"
#include "gtkwrappers.h"
#include "scripturechecks.h"
#include "directories.h"
#include "references.h"
#include "scripturechecksloadresults.h"
#include "scripturechecksdisplayresultsbrowser.h"
#include "dialogcheck.h"
#include "style.h"
#include "stylesheetutils.h"
#include "shell.h"
#include "projectutils.h"
#include "generalconfig.h"
#include "projectconfig.h"
#include "progresswindow.h"
#include "checks.h"
#include "check_validate_usfm.h"
#include "session.h"
#include "check_count_words.h"


vector<ustring> scripture_checks_internal_generate_filenames_separate ()
{
  // The filenames.
  vector<ustring> filenames;
  // Export all books.
  ustring directory = gw_build_filename (directories_get_temp(), "checks");
  create_directory (directory);
  GeneralConfiguration genconfig (0);
  ustring command;
  command = "bibledit --export-project-usfm ";
  command.append ("--project" + shell_quote_space (genconfig.project()));
  command.append ("--directory" + shell_quote_space (directory));
  command.append ("--gui");
  system (command.c_str());
  // Produce the filenames.
  vector<ustring> books = project_get_books (genconfig.project());
  Session session (0);
  set<ustring> selected_books = session.selected_books ();
  for (unsigned int i = 0; i < books.size(); i++) {
    if (selected_books.find (books[i]) != selected_books.end())
      filenames.push_back (shell_quote_space (gw_build_filename (directory, books[i] + ".usfm")));
  } 
  // Return result.
  return filenames;
}


ustring scripture_checks_internal_generate_filenames_combined ()
{
  // The filenames.
  ustring filenames;
  // Get the separate filenames.
  vector <ustring> names = scripture_checks_internal_generate_filenames_separate ();
  // Produce the filenames.
  for (unsigned int i = 0; i < names.size(); i++) {
    filenames.append (names[i]);
  } 
  // Return result.
  return filenames;
}


vector<ustring> checks_generate_booknames ()
{
  vector<ustring> booknames;
  GeneralConfiguration genconfig (0);
  Session session (0);
  vector<ustring> books = project_get_books (genconfig.project());
  set<ustring> selected_books = session.selected_books ();
  for (unsigned int i = 0; i < books.size(); i++) {
    if (selected_books.find (books[i]) != selected_books.end())
      booknames.push_back (books[i]);
  } 
  return booknames;
}


ustring scripture_checks_internal_temporal_outputfile ()
// Produces the filename for the messages in the temporal directory.
{
  return gw_build_filename (directories_get_temp(), "check.out");
}


ustring scripture_checks_internal_temporal_errorfile ()
// Produces the filename for the errors in the temporal directory.
{
  return gw_build_filename (directories_get_temp(), "check.err");
}


void scripture_checks_internal_unlink_files ()
{
  unlink (scripture_checks_internal_temporal_outputfile ().c_str());
  unlink (scripture_checks_internal_temporal_errorfile ().c_str());
}


void scripture_checks_internal_display_results (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
// Displays the results of the checks to the user.
{
  // See if we have any error messages.
  gchar *contents;
  g_file_get_contents (scripture_checks_internal_temporal_errorfile().c_str(), &contents, NULL, NULL);
  ustring ucontents = contents;
  g_free (contents);
  if (!ucontents.empty()) {
    ucontents.insert (0, "There were some errors while checking:\n\n");
    gtkw_dialog_error (NULL, ucontents);
  }
  ScriptureChecksLoadResults sclr (scripture_checks_internal_temporal_outputfile ());
  References references (liststore, treeview, treecolumn);
  references.set_references (sclr.references, sclr.comments);
  references.fill_store ();
}


void scripture_checks_internal_display_results (CheckDialogType displaywhat)
// Displays the results of the checks to the user.
{
  // See if we have any error messages.
  gchar *contents;
  g_file_get_contents (scripture_checks_internal_temporal_errorfile().c_str(), &contents, NULL, NULL);
  ustring ucontents = contents;
  g_free (contents);
  if (!ucontents.empty()) {
    ucontents.insert (0, "There were some errors while checking:\n\n");
    gtkw_dialog_error (NULL, ucontents);
  }
  ScriptureChecksDisplayResultsBrowser scdrb (displaywhat, scripture_checks_internal_temporal_outputfile ());
}


void scripture_checks_chapters_verses (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  {
    CheckDialog dialog (cdtChaptersVerses);
    if (dialog.run() != GTK_RESPONSE_OK)
      return;
  }
  ProgressWindow progresswindow ("", true);
  scripture_checks_internal_unlink_files ();
  vector<ustring> filenames;
  filenames = scripture_checks_internal_generate_filenames_separate ();
  progresswindow.set_iterate (0, 1, filenames.size());
  ProjectConfiguration projectconfig ("");
  for (unsigned int i = 0; i < filenames.size(); i++) {
    progresswindow.iterate ();
    if (progresswindow.cancel)
      break;
    ustring command;
    command = "sc-input-usfm " + filenames[i]; // Do not quote here - done before.
    command.append (" | sc-chapters-verses --versification " + projectconfig.versification());
    command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
    command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
    system (command.c_str());
  }
  scripture_checks_internal_display_results (liststore, treeview, treecolumn);
}


void scripture_checks_count_usfms ()
{
  int dialogresponse;
  {
    CheckDialog dialog (cdtMarkersCount);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  ProgressWindow progresswindow ("", true);
  scripture_checks_internal_unlink_files ();
  ustring filenames;
  filenames = scripture_checks_internal_generate_filenames_combined ();
  progresswindow.set_fraction (0.5);
  Session session (0);
  ustring command;
  command = "sc-count-usfm ";
  if (session.checksorttype() == cstSort1)
    command.append ("--sort-on-usfm");
  if (session.checksorttype() == cstSort2)
    command.append ("--sort-on-count");
  command.append (" " + filenames); // Do not quote here - done before.
  command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
  command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
  system (command.c_str());
  progresswindow.set_fraction (1);
  scripture_checks_internal_display_results (cdtMarkersCount);
}


void scripture_checks_validate_usfms (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  int dialogresponse;
  {
    CheckDialog dialog (cdtMarkersValidate);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  GeneralConfiguration genconfig (0);
  CheckValidateUsfm check (genconfig.project(), checks_generate_booknames (), true, true);
  checks_display_references_comments (check.references, check.comments, liststore, treeview, treecolumn);
}


void scripture_checks_compare_usfms (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  // Configuration.
  GeneralConfiguration genconfig (0);
  Session session (0);
  // Open dialog to present information to user and to get the options from him.
  int dialogresponse;
  {
    CheckDialog dialog (cdtMarkersCompare);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  // Get the books of the two projects.
  vector<ustring> scripture_books = project_get_books (genconfig.project());
  vector<ustring> scripture2_books = project_get_books (genconfig.check_markers_compare_project());
  // Container with flags to verify all books in scripture2 have been dealt with.
  vector<bool> scripture2_checked;
  for (unsigned int i = 0; i < scripture2_books.size(); i++)
    scripture2_checked.push_back (false);
  // Display progress.
  ProgressWindow progresswindow ("", true);
  scripture_checks_internal_unlink_files ();
  progresswindow.set_iterate (0, 1, scripture_books.size());
  // Go through each book we have in this scripture.
  set <ustring> selected_books = session.selected_books ();
  for (unsigned int i = 0; i < scripture_books.size(); i++) {
    progresswindow.iterate ();
    if (progresswindow.cancel)
      break;
    // See whether this book is included in the ones to check.
    if (selected_books.find (scripture_books[i]) != selected_books.end()) {
      // Get pointer to same book in second scripture.
      int pointer2 = -1;
      for (unsigned int i2 = 0; i2 < scripture2_books.size(); i2++) {
        if (scripture_books[i] == scripture2_books[i2]) {
          pointer2 = i2;
          break;
        }
      }
      // Command to execute.
      ustring command;
      // Does the book exist in the second project?
      if (project_book_exists (genconfig.check_markers_compare_project(), scripture_books[i])) {
        // Ok, same book found in second scripture.
        ustring file1 = shell_quote_space (scripture_books[i]);
        ustring file2 = shell_quote_space (scripture2_books[pointer2]);
        command = "sc-compare-usfm" + shell_quote_space (file1) + shell_quote_space (file2);
        if ((!genconfig.check_markers_compare_all_markers()) && (!genconfig.check_markers_compare_include_only().empty())) {
          command.append (" --include-only '" + genconfig.check_markers_compare_include_only() + "'");
        }
        if (!genconfig.check_markers_compare_ignore().empty()) {
          command.append (" --ignore '" + genconfig.check_markers_compare_ignore() + "'");
        }
        if (genconfig.check_markers_compare_ignore_verse_zero())
          command.append (" --ignore-verse-zero");
        // Mark this book as done.
        scripture2_checked [pointer2] = true;
      }
      else {
        // Same book not found: give message about that.
        command = "sc-message '" + scripture_books[i]
          + "' 0 0 'Extra book'";
      }
      command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
      command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
      system (command.c_str());
    }
  }
  // See whether there are still some unchecked books in the second scripture.
  // Give message if there are unchecked ones, but only in case ALL books
  // were selected for checking.
  if (scripture_books.size() == session.selected_books().size()) {
    for (unsigned int i = 0; i < scripture2_checked.size(); i++) {
      if (!scripture2_checked[i]) {
        ustring command;
        command = "sc-message '" + scripture2_books[i]
          + "' 0 0 'Missing book'";
        command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
        command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
        system (command.c_str());
      }
    }
  }
  // Display results and finish.
  scripture_checks_internal_display_results (liststore, treeview, treecolumn);
}


void scripture_checks_count_characters ()
{
  int dialogresponse;
  {
    CheckDialog dialog (cdtCharactersCount);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  scripture_checks_internal_unlink_files ();
  ustring filenames;
  filenames = scripture_checks_internal_generate_filenames_combined ();
  ProgressWindow progresswindow ("", false);
  progresswindow.set_fraction (0.5);
  Session session (0);
  ustring command;
  command = "sc-input-usfm --categorize ";
  command.append (filenames); // Do not quote here - done before.
  command.append (" | sc-count-characters ");
  if (session.checksorttype() == cstSort0)
    command.append ("--no-sort");
  if (session.checksorttype() == cstSort1)
    command.append ("--sort-on-character");
  if (session.checksorttype() == cstSort2)
    command.append ("--sort-on-count");
  command.append (" --invisible-characters");
  command.append (" >> " + scripture_checks_internal_temporal_outputfile());
  command.append (" 2>> " + scripture_checks_internal_temporal_errorfile());
  run_shell_progress (command, "sc-count-characters", true, "Counting", false, 0);
  scripture_checks_internal_display_results (cdtCharactersCount);
}


void scripture_checks_unwanted_patterns (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  GeneralConfiguration genconfig (0);
  int dialogresponse;
  {
    CheckDialog dialog (cdtUnwantedPatterns);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  ProgressWindow progresswindow ("", true);
  scripture_checks_internal_unlink_files ();
  vector<ustring> filenames;
  filenames = scripture_checks_internal_generate_filenames_separate ();
  progresswindow.set_iterate (0, 1, filenames.size());
  for (unsigned int i = 0; i < filenames.size(); i++) {
    progresswindow.iterate ();
    if (progresswindow.cancel)
      break;
    ustring command;
    command = "sc-input-usfm --categorize " + filenames[i]; // Do not quote here - done before.
    command.append (" | sc-unwanted-patterns '" + checks_unwanted_patterns_get_filename (genconfig.project()) + "'");
    command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
    command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
    system (command.c_str());
  }
  scripture_checks_internal_display_results (liststore, treeview, treecolumn);
}


void scripture_checks_capitalization (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  GeneralConfiguration genconfig (0);
  int dialogresponse;
  {
    CheckDialog dialog (cdtWordsCapitalization);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  ProgressWindow progresswindow ("", true);
  scripture_checks_internal_unlink_files ();
  vector<ustring> filenames;
  filenames = scripture_checks_internal_generate_filenames_separate ();
  progresswindow.set_iterate (0, 1, filenames.size());
  for (unsigned int i = 0; i < filenames.size(); i++) {
    progresswindow.iterate ();
    if (progresswindow.cancel)
      break;
    ustring command;
    command = "sc-input-usfm --categorize " + filenames[i]; // Do not quote here - done before.
    command.append (" | sc-capitalization");
    if (!genconfig.check_capitalization_punctuation().empty())
      command.append (" --punctuation-followed-by-capitals '" + genconfig.check_capitalization_punctuation() + "'");
    if (!genconfig.check_capitalization_ignore().empty())
      command.append (" --ignore-lowercase-following '" + genconfig.check_capitalization_ignore() + "'");
    command.append (" --abbreviations '" + checks_abbreviations_get_filename (genconfig.project()) + "'");    
    if (genconfig.check_capitalization_allow_any_prefixes())
      command.append (" --any-prefixes");
    else
      command.append (" --uncapitalized-prefixes '" + checks_uncapitalized_prefixes_get_filename (genconfig.project()) + "'");    
    command.append (" --capitalized-suffixes '" + checks_capitalized_suffixes_get_filename (genconfig.project()) + "'");    
    command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
    command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
    system (command.c_str());
  }
  scripture_checks_internal_display_results (liststore, treeview, treecolumn);
}


void scripture_checks_repetition (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  GeneralConfiguration genconfig (0);
  int dialogresponse;
  {
    CheckDialog dialog (cdtWordsRepetition);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  ProgressWindow progresswindow ("", true);
  scripture_checks_internal_unlink_files ();
  vector<ustring> filenames;
  filenames = scripture_checks_internal_generate_filenames_separate ();
  progresswindow.set_iterate (0, 1, filenames.size());
  for (unsigned int i = 0; i < filenames.size(); i++) {
    progresswindow.iterate ();
    if (progresswindow.cancel)
      break;
    ustring command;
    command = "sc-input-usfm --categorize " + filenames[i]; // Do not quote here - done before.
    command.append (" | sc-repetition");
    if (genconfig.check_repetition_ignore_case())
      command.append (" --ignore-case");
    if (genconfig.check_repetition_show_only_these())
      command.append (" --show-only '" + checks_repetition_show_only_get_filename (genconfig.project()) + "'");
    if (genconfig.check_repetition_ignore_these())
      command.append (" --ignore '" + checks_repetition_ignore_get_filename (genconfig.project()) + "'");
    command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
    command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
    system (command.c_str());
  }
  scripture_checks_internal_display_results (liststore, treeview, treecolumn);
}


void scripture_checks_matching_pairs (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  GeneralConfiguration genconfig (0);
  int dialogresponse;
  {
    CheckDialog dialog (cdtMatchingPairs);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  ProgressWindow progresswindow ("", true);
  scripture_checks_internal_unlink_files ();
  vector<ustring> filenames;
  filenames = scripture_checks_internal_generate_filenames_separate ();
  progresswindow.set_iterate (0, 1, filenames.size());
  for (unsigned int i = 0; i < filenames.size(); i++) {
    progresswindow.iterate ();
    if (progresswindow.cancel)
      break;
    ustring command;
    command = "sc-input-usfm --categorize " + filenames[i]; // Do not quote here - done before.
    command.append (" | sc-matching-pairs");
    if (!genconfig.check_matching_pairs_ignore().empty()) {
      command.append (" --ignore '");
      command.append (genconfig.check_matching_pairs_ignore());
      command.append ("'");
    }
    command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
    command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
    system (command.c_str());
  }
  scripture_checks_internal_display_results (liststore, treeview, treecolumn);
}


void scripture_checks_unwanted_words (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  GeneralConfiguration genconfig (0);
  int dialogresponse;
  {
    CheckDialog dialog (cdtWordsUnwanted);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  ProgressWindow progresswindow ("", true);
  scripture_checks_internal_unlink_files ();
  vector<ustring> filenames;
  filenames = scripture_checks_internal_generate_filenames_separate ();
  progresswindow.set_iterate (0, 1, filenames.size());
  for (unsigned int i = 0; i < filenames.size(); i++) {
    progresswindow.iterate ();
    if (progresswindow.cancel)
      break;
    ustring command;
    command = "sc-input-usfm --categorize " + filenames[i]; // Do not quote here - done before.
    command.append (" | sc-unwanted-words");
    command.append (" '" + checks_unwanted_words_get_filename (genconfig.project()) + "'");
    command.append (" >>" + shell_quote_space (scripture_checks_internal_temporal_outputfile()));
    command.append ("2>>" + shell_quote_space (scripture_checks_internal_temporal_errorfile()));
    system (command.c_str());
  }
  scripture_checks_internal_display_results (liststore, treeview, treecolumn);
}


void scripture_checks_word_inventory (GtkListStore * liststore, GtkWidget * treeview, GtkTreeViewColumn * treecolumn)
{
  int dialogresponse;
  {
    CheckDialog dialog (cdtWordsCount);
    dialogresponse = dialog.run();
  }
  if (dialogresponse != GTK_RESPONSE_OK)
    return;
  GeneralConfiguration genconfig (0);
  Session session (0);
  CheckCountWords check (genconfig.project(), checks_generate_booknames (), 
                         genconfig.check_words_inventory_word_forming_characters(),
                         session.checksorttype () == cstSort1, 
                         session.checksorttype () == cstSort2, 
                         genconfig.check_words_inventory_not_include_words_count (), 
                         true);
  DisplayCheckingResults display ("Word Inventory");
  display.word_inventory (check.words, check.counts, 
                          check.total_count, check.total_unique_count, check.filtered_count, check.filtered_unique_count, 
                          genconfig.check_words_inventory_not_include_words_count ());
}


ustring checks_abbreviations_get_filename (const ustring& project)
{
  return gw_build_filename (directories_get_projects (), project, "abbreviations");
}


ustring checks_uncapitalized_prefixes_get_filename (const ustring& project)
{
  return gw_build_filename (directories_get_projects (), project, "uncapitalized-prefixes");
}


ustring checks_capitalized_suffixes_get_filename (const ustring& project)
{
  return gw_build_filename (directories_get_projects (), project, "capitalized_suffixes");
}


ustring checks_repetition_show_only_get_filename (const ustring& project)
{
  return gw_build_filename (directories_get_projects (), project, "repetitions_show_only");
}


ustring checks_repetition_ignore_get_filename (const ustring& project)
{
  return gw_build_filename (directories_get_projects (), project, "repetitions_ignore");
}


ustring checks_unwanted_patterns_get_filename (const ustring& project)
{
  return gw_build_filename (directories_get_projects (), project, "unwanted_patterns");
}


ustring checks_unwanted_words_get_filename (const ustring& project)
{
  return gw_build_filename (directories_get_projects (), project, "unwanted_words");
}


/*

Some checks from Umhloli.

procedure CheckAllowedCharacters (USFM : TUSFM; Curr : char);
procedure CheckSentenceStructure (USFM : TUSFM; Curr : char; TidyUp : boolean = false);
procedure CheckBracketStructure (USFM : TUSFM; Curr : char; TidyUp : boolean = false);
procedure CheckAdjacentCharacters (USFM : TUSFM; Curr : char);
procedure InitializeQuotationMarks;
procedure CheckQuotationMarks (USFM : TUSFM; Curr : char; TidyUp : boolean = false);
procedure CheckForAndRemoveTabsAndDoubleSpaces (Name : string);
procedure InitializeCharacterSets;
procedure CheckCommaSemicolonColon (USFM : TUSFM; Curr : char);
procedure InitializeCommaSemicolonColon;
procedure InitializeSentenceStructure (InitIt : boolean);
procedure InitializeUnwantedWords;
procedure CheckUnwantedWords (USFM : TUSFM; Curr : char; TidyUp : boolean = false);
procedure CheckLeadingSpaces (USFM : TUSFM; Curr : char);


const
  NumberChars : set of char = ['0' .. '9'];

var
  SentenceEnders : set of char;
  SentenceIndifference : set of char;
  SmallLetters : set of char;
  Capitals : set of char;
  NonAdjacentChars : set of char;
  RightOfQuoteChars : set of char;
  AllowedCharacters : set of char;
  SentenceCutterFound : boolean;
  FilterOutChars : set of char;
  PreviousSFMHeading : boolean;
  PreviousSFMHeadingNumber : integer;
  HeadingText : string;
  SpaceFound : boolean;


implementation

uses
  MyFeedback, Utilities, MyRegistry, UnitMain, SysUtils,
  Classes, NamesList, UnwantedWordsUnit, TypesConstants, ScripturesUnit;

type
  BF = record
    Opener : char;
    Closer : char;
    Place : string;
  end;
  BC = record
    Beginning : char;
    Ending : char;
  end;


const
  SentenceStarters = ['A'..'Z'];
  MaxBracketLevel = 5;

var
  SentenceHasStarted : boolean;
  BracketFound : array [1..MaxBracketLevel] of BF;
  BracketLevel : integer;
  BracketChars : array [1..10] of BC;
  NumberOfBracketsToCheck : integer;
  OpeningBracketWasThere : boolean;
  QuotationMarksFound : integer;
  CommaSemicolonColonFound : boolean;
  NamesStringList : TStringList;
  CapitalsBuildingUp : boolean;
  CapitalizedWord : string;
  SentenceEndedAtPreviousCharacter : boolean;
  UnwantedWordsList, UnwantedTicksList, UnwantedCommentsList : TStringList;
  UnwantedSearchPhrase, UnwantedSearchWord : string;


procedure InitializeSentenceStructure (InitIt : boolean);
var
  Scripture : TScripture;
begin
  if InitIt then
  begin
    SentenceHasStarted := false;
    CapitalsBuildingUp := false;
    CapitalizedWord := '';
    NamesStringList := TStringList.Create;
    NamesStringList.Sorted := true;
    NamesStringList.CaseSensitive := true;
    Scripture := TScripture.Create(nil, -1);
    if FileExists (Scripture.UmhloliDataPath + ApprovedNamesTxt) then
      NamesStringList.LoadFromFile(Scripture.UmhloliDataPath + ApprovedNamesTxt);
    Scripture.Free;
    PreviousSFMHeading := false;
    PreviousSFMHeadingNumber := 0;
    SentenceEndedAtPreviousCharacter := false;
  end
  else
  begin
    NamesStringList.Free;
  end;
end;



procedure CheckSentenceStructure (USFM : TUSFM; Curr : char; TidyUp : boolean = false);
var
  i : integer;
begin
  // No checking of sentence structure when an indifferent character is found.
  if not (Curr in SentenceIndifference) then
  begin
    if (Curr in SentenceEnders) and (not SentenceHasStarted) then
    begin
      AddResultMessage (USFM, USFM.PreviousChars + ' - A character ' + Curr + ' ends the sentence which hasn''t been started yet.');
    end;
    if (not (Curr in Capitals)) and (Curr <> ' ') and (not SentenceHasStarted) then
    begin
      AddResultMessage (USFM, USFM.PreviousChars + ' - A sentence should not start with ' + Curr + ' but with a capital.');
      SentenceHasStarted := true;
    end;

    // Solve problems like characters following immediately after a
    // sentence ender.
    if SentenceEndedAtPreviousCharacter then
    begin
      if Curr <> ' ' then
        AddResultMessage (USFM, USFM.PreviousChars + ' - Invalid character (' + Curr + ') found straight after a sentence closer.');
      SentenceEndedAtPreviousCharacter := false;
    end;

    // No spaces allowed before a sentence ender.
    if Curr in SentenceEnders then
    begin
      if USFM.PreviousCharacter (1) = ' ' then
        AddResultMessage (USFM, USFM.PreviousChars + ' - No space allowed before a character ending a sentence.');
    end;




    if USFM.HeadingNow then
      if Curr <> ' ' then HeadingText := HeadingText + Curr;
    if (USFM.HeadingNow <> PreviousSFMHeading)
      or (USFM.HeadingNumber <> PreviousSFMHeadingNumber) then
    begin
      if USFM.HeadingNow then
      // Heading starts here.
      begin
        if SentenceHasStarted then
          AddResultMessage (USFM, USFM.PreviousChars + ' - A heading started whereas the running sentence has not yet been finished.');
      end
      else
      // Heading ends here.
      begin
        if HeadingText <> '' then
          if not SentenceHasStarted then
            AddResultMessage (USFM, USFM.PreviousChars + ' - Heading should start with a capital, and not end with a full stop.');
        SentenceHasStarted := false;
      end;
      PreviousSFMHeading := USFM.HeadingNow;
      PreviousSFMHeadingNumber := USFM.HeadingNumber;
    end;
    if not USFM.HeadingNow then HeadingText := '';


    if USFM.NewParagraphStarted then
    begin
      if SentenceHasStarted then
        AddResultMessage (USFM, USFM.PreviousChars + ' - A new paragraph started whereas the running sentence has not yet been finished.');
      USFM.NewParagraphStarted := false;
    end;

  end;


  // Capitals are only allowed in ALL CAPS words and in names.
  if (Curr in Capitals) and SentenceHasStarted then CapitalsBuildingUp := true;
  if CapitalsBuildingUp then
  begin
    if ((Curr in Capitals) or (Curr in SmallLetters) or (Curr in FilterOutChars))
      and (Curr <> '-') then
      CapitalizedWord := CapitalizedWord + Curr
    else
    begin
      // Word with capital(s) found.
      CapitalsBuildingUp := false;
      // Skip ALL CAPS words.
      if UpperCase (CapitalizedWord) <> CapitalizedWord then
      begin
        if CapitalizedWord [Length (CapitalizedWord)] in FilterOutChars then
          Delete (CapitalizedWord, Length (CapitalizedWord), 1);
        if not NamesStringList.Find (CapitalizedWord, i) then
        begin
          AddResultMessage (USFM, USFM.PreviousChars + ' - A capitalized word (' + CapitalizedWord + ') was found within a sentence.');
          AddResultMessage (USFM, 'Hint: If this capitalized word is a name, add it to the names list.');
        end;
      end;
      CapitalizedWord := '';
    end;
  end;



  if not (Curr in SentenceIndifference) then
  begin
    if Curr in SentenceEnders then
    begin
      SentenceHasStarted := false;
      SentenceEndedAtPreviousCharacter := true;
    end;
    if (Curr in SentenceStarters) and (not USFM.TitleNow) then
      SentenceHasStarted := true;
  end;


  // End-of-book check.
  if TidyUp and SentenceHasStarted then
    AddResultMessage (USFM, USFM.PreviousChars + ' - The sentence has not been ended properly at the end of the book.');

end;


procedure CheckBracketStructure (USFM : TUSFM; Curr : char; TidyUp : boolean = false);
var
  C : integer;

begin

  If OpeningBracketWasThere then
  begin
    OpeningBracketWasThere := false;
    if Curr = ' ' then
    begin
      AddResultMessage (USFM, USFM.PreviousChars + ' - A space has been found right after an opening bracket.');
    end;
  end;

  For C := 1 to NumberOfBracketsToCheck do
  begin
    if Curr = BracketChars [C].Beginning then
    begin
      OpeningBracketWasThere := true;
      // Next line comes in useful when lots of wild brackets are in texts
      // being checked which are not yet proofread. 
      if BracketLevel < 0 then BracketLevel := 0;
      Inc (BracketLevel);
      if (BracketLevel <= MaxBracketLevel) and (BracketLevel > 0) then
      begin
        BracketFound [BracketLevel].Opener := Curr;
        BracketFound [BracketLevel].Closer := BracketChars [C].Ending;
        BracketFound [BracketLevel].Place := USFM.CurrentPlace + USFM.PreviousChars;
      end
      else
      begin
        AddResultMessage (USFM, USFM.PreviousChars + ' - Too many opening brackets nested: ' + Curr);
      end;
    end;
  end;

  For C := 1 to NumberOfBracketsToCheck do
  begin
    if Curr = BracketChars [C].Ending then
    begin
      if USFM.PreviousCharacter (1) = ' ' then
      begin
        AddResultMessage (USFM, USFM.PreviousChars + ' - A space has been found right before a closing bracket.');
      end;
      if (BracketLevel <= MaxBracketLevel) and (BracketLevel > 0) then
      begin
        if Curr = BracketFound [BracketLevel].Closer then
        begin
        end
        else
        begin
          AddResultMessage (USFM, USFM.PreviousChars + ' - Found a closing bracket ' + Curr + ' which does not match its opener...');
          AddResultMessage (USFM, BracketFound [BracketLevel].Place + ' - ... and here is the mismatched opener talked about in the previous message.', false);
        end;
      end
      else
      begin
        AddResultMessage (USFM, USFM.PreviousChars + ' - A closing bracket found without matching opener: ' + Curr);
      end;
      Dec (BracketLevel);
      // Next line makes sure that wild numbers of brackets don't disturb the
      // checking too much.
      if BracketLevel < 0 then BracketLevel := 0;
    end;
  end;

  if TidyUp then
  begin
    if BracketLevel > MaxBracketLevel then BracketLevel := MaxBracketLevel;
    For C := 1 to BracketLevel do
    begin
      AddResultMessage (USFM, BracketFound [C].Place + ' - An opening bracket without matching closer has been found: ' + BracketFound [C].Opener, false);
    end;
    BracketLevel := 0;
  end;

end;


procedure CheckAdjacentCharacters (USFM : TUSFM; Curr : char);
begin
  if Curr in NonAdjacentChars then
    if USFM.PreviousCharacter (1) in NonAdjacentChars then
      AddResultMessage (USFM, USFM.PreviousChars + ' - Two characters found which are not supposed to be adjacent to each other: ' + USFM.PreviousCharacter (1) + ' and ' + Curr);
end;




procedure InitializeQuotationMarks;
begin
  QuotationMarksFound := 0;
end;


procedure CheckQuotationMarks (USFM : TUSFM; Curr : char; TidyUp : boolean = false);
begin
  if Curr = '"' then
  begin
    Inc (QuotationMarksFound);
    if (QuotationMarksFound mod 2) = 1 then
    begin
      if USFM.PreviousCharacter (1) <> ' ' then
      begin
        AddResultMessage (USFM, USFM.PreviousChars + ' - A space is supposed to be found left to an opening quotation mark, but another character was found: ' + USFM.PreviousCharacter (1));
      end;
    end;
  end;
  if (not (Curr in RightOfQuoteChars))
    and (Curr <> ' ')
    and ((QuotationMarksFound mod 2) = 0)
    and (USFM.PreviousCharacter (1) = '"') then
      AddResultMessage (USFM, USFM.PreviousChars + ' - An invalid character is found next to a closing quotation mark: ' + Curr);
  if TidyUp then
  begin
    if (QuotationMarksFound mod 2) <> 0 then
    begin
      AddResultMessage (USFM, 'Having reached the end of the book, it appears that one or more quotation marks have not been closed.');
    end;
  end;
end;



procedure CheckCommaSemicolonColon (USFM : TUSFM; Curr : char);
{ After a comma, semicolon or colon, these rules apply:
- there should be one of the following characters:
  1. a closing bracket
  2. a space
  3. a quotation mark
- if not, a message is written.
}
var
  TheRightCharFound : boolean;
  i : integer;
begin
  if SentenceCutterFound then
  begin
    TheRightCharFound := false;
    for i := 1 to NumberOfBracketsToCheck do
      if Curr = BracketChars [i].Ending then TheRightCharFound := true;
    if Curr = ' ' then TheRightCharFound := true;
    if Curr = '"' then TheRightCharFound := true;
    if not TheRightCharFound then
      AddResultMessage (USFM, USFM.PreviousChars + ' - A sentence divider is followed by an invalid character: ' + Curr);
    SentenceCutterFound := false;
  end;

  if Curr in [',', ';', ':'] then
  begin
    SentenceCutterFound := true;
  end;

end;


procedure CheckUnwantedWords (USFM : TUSFM; Curr : char; TidyUp : boolean = false);
// There are two types of checks. 1. For words. 2. For phrases.
var
  i, i2 : integer;
  S : string;
begin

  // Check for complete words.
  if (Curr in Capitals) or (Curr in SmallLetters) or (Curr in FilterOutChars) then
    UnwantedSearchWord := UnwantedSearchWord + Curr
  else
  begin
    if UnwantedSearchWord <> '' then
    begin
      i := UnwantedWordsList.IndexOf (LowerCase (UnwantedSearchWord));
      if i >= 0 then
      begin
        if UnwantedTicksList.Strings [i] = UnwantedWordSearch then
          AddResultMessage (USFM, USFM.PreviousChars + ' - An unwanted word (' + UnwantedSearchWord + ') has been found. Comment: ' + UnwantedCommentsList.Strings [i] + '.');
      end;
      UnwantedSearchWord := '';
    end;
  end;

  // Fill the search string and the reference array.
  UnwantedSearchPhrase := UnwantedSearchPhrase + LowerCase (Curr);
  Delete (UnwantedSearchPhrase, 1, 1);
  UnwantedRefs [UnwantedRefsPointer] := USFM.CurrentPlace;
  UnwantedPrevs [UnwantedRefsPointer] := USFM.PreviousChars;
  Inc (UnwantedRefsPointer);

  // If the limit has been reached, check for search phrases, if they exist.
  if (UnwantedRefsPointer > UnwantedRefsSize) or TidyUp then
  begin
    for i := 0 to UnwantedWordsList.Count - 1 do
    begin
      if UnwantedTicksList.Strings [i] = UnwantedPhraseSearch then
      begin
        if Pos (LowerCase (UnwantedWordsList [i]), UnwantedSearchPhrase) > 0 then
        begin
          for i2 := 1 to UnwantedRefsPointer - 1 do
          begin
            S := Copy (UnwantedSearchPhrase, UnwantedPhraseMaxLength - UnwantedRefsPointer + 1 + i2 - Length (UnwantedWordsList [i]) + 1, Length (UnwantedWordsList [i]));
            if S = UnwantedWordsList [i] then
            begin
              AddResultMessage (USFM, UnwantedRefs [i2] + ' ' + UnwantedPrevs [i2] + ' - An unwanted phrase (' + S + ') has been found. Comment: ' + UnwantedCommentsList.Strings [i] + '.', false);
            end;
          end;
        end;
      end;
    end;
    UnwantedRefsPointer := 1;
  end;

  // Initialize system for next search.
  if TidyUp then InitializeUnwantedWords;
end;


procedure InitializeCommaSemicolonColon;
begin
  CommaSemicolonColonFound := false;
end;





procedure CheckForAndRemoveTabsAndDoubleSpaces (Name : string);
var
  F1, F2 : Textfile;
  S : String;
  i : integer;
  Trimmed, DoubleSpaceFound, TabCharFound : boolean;

begin
  Trimmed := false;
  DoubleSpaceFound := false;
  TabCharFound := false;
  AssignFile(F1, Name);
  Reset(F1);
  while not Eof(F1) do
  begin
    Readln (F1, S);
    if Trim (S) <> S then Trimmed := true;
    if Pos ('  ', S) > 0 then DoubleSpaceFound := true;
    if Pos (TabChar, S) > 0 then TabCharFound := true;
  end;
  CloseFile(F1);
  if Trimmed then
    AddFeedbackMessage ('Unnecessary characters at the beginning or the end of the line have been removed from ' + Name);
  if DoublespaceFound then
    AddFeedbackMessage ('Double spaces have been removed from  ' + Name);
  if TabCharFound then
    AddFeedbackMessage ('Tabs have been removed from  ' + Name);

  if Trimmed or DoublespaceFound or TabCharFound then
  begin
    AssignFile(F1, Name);
    Reset(F1);
    AssignFile (F2, ProgramPath + 'tabspace.tmp');
    Rewrite (F2);
    while not Eof(F1) do
    begin
      Readln (F1, S);
      S := Trim (S);
      repeat
        i := Pos (TabChar, S);
        if i > 0 then
        begin
          Delete (S, i, 1);
          Insert (' ', S, i);
        end;
      until i = 0;
      repeat
        i := Pos ('  ', S);
        if i > 0 then Delete (S, i, 1);
      until i = 0;
      WriteLn (F2, S);
    end;
    CloseFile(F1);
    CloseFile (F2);
    DeleteFile (Name);
    RenameFile (ProgramPath + 'tabspace.tmp', Name);
  end;
end;



procedure InitializeCharacterSets;

var
  F1 : Textfile;
  S : String;
  i : integer;
begin
  SentenceEnders := StringToSet (RegistryReadKey (SentenceEndersKey));
  SentenceIndifference := StringToSet (RegistryReadKey (SentenceIndifferenceKey));
  SmallLetters := StringToSet (RegistryReadKey (SmallLettersKey));
  Capitals := StringToSet (RegistryReadKey (CapitalsKey));
  NonAdjacentChars := StringToSet (RegistryReadKey (NotAdjacentKey));
  RightOfQuoteChars := StringtoSet (RegistryReadKey (RightQuotesKey));
  FilterOutChars := StringToSet (RegistryReadKey (CharsFilteredOutKey));

  AllowedCharacters := [];
  for i := 1 to NumberOfBracketsToCheck do
  begin
    AllowedCharacters := AllowedCharacters
      + [BracketChars [i].Beginning] + [BracketChars [i].Ending];
  end;
  AllowedCharacters := AllowedCharacters + ['"'];
  AllowedCharacters := AllowedCharacters + SentenceEnders
    + SentenceIndifference + SmallLetters + Capitals
    + NonAdjacentChars + RightOfQuoteChars + ['\'] + [' ']
    + StringToSet (RegistryReadKey (ExtraCharsInTextKey));

  SentenceCutterFound := false;

  // Initialize brackets.
  BracketLevel := 0;
  NumberOfBracketsToCheck := 0;
  OpeningBracketWasThere := false;
  AssignFile(F1, ProgramPath + 'brackets.txt');
  Reset(F1);
  while not Eof(F1) do
  begin
    Readln (F1, S);
    if Length (S) = 2 then
    begin
      Inc (NumberOfBracketsToCheck);
      BracketChars [NumberOfBracketsToCheck].Beginning := S[1];
      BracketChars [NumberOfBracketsToCheck].Ending := S[2];
    end
    else AddFeedbackMessage ('While reading the brackets to check for, an invalid set of brackets has been found: ' + S);
  end;
  CloseFile(F1);

  SpaceFound := false;
end;


procedure InitializeUnwantedWords;
var
  TList : TStringList;
  i, i2 : integer;
  S, S2 : string;
  Scripture : TScripture;
begin
  UnwantedSearchWord := '';
  UnwantedSearchPhrase := StringOfChar(' ', UnwantedPhraseMaxLength);
  TList := TStringList.Create;
  Scripture := TScripture.Create(Nil, -1);
  if FileExists (Scripture.UmhloliDataPath + UnwantedWordsTxt) then
    TList.LoadFromFile (Scripture.UmhloliDataPath + UnwantedWordsTxt);
  Scripture.Free;
  UnwantedWordsList.Clear;
  UnwantedTicksList.Clear;
  UnwantedCommentsList.Clear;
  for i := 0 to TList.Count - 1 do
  begin
    S := TList.Strings [i];
    i2 := Pos (' (', S);
    S2 := Copy (S, 1, i2 - 1);
    UnwantedWordsList.Add (S2);
    S := Copy (S, i2, 1000);
    i2 := Pos (') (', S);
    S2 := Copy (S, 3, i2 - 3);
    UnwantedTicksList.Add (S2);
    S2 := Copy (S, i2 + 3, 1000);
    Delete (S2, Length (S2), 1);
    UnwantedCommentsList.Add (S2);
  end;
  TList.Free;
  for i := 1 to UnwantedRefsSize do
  begin
    UnwantedRefs [i] := '';
    UnwantedPrevs [i] := '';
  end;
  UnwantedRefsPointer := 1;
end;



procedure CheckLeadingSpaces (USFM : TUSFM; Curr : char);
{ Certain punctuation cannot have a leading space. E.g. the following sentence
has a problem:

  And he said to her : How do you do?

There is a space before a colon.
This procedure checks this type of mistakes.
}
begin
  if SpaceFound then
  begin
    if Curr in [':', ',', ';', '?', '!'] then
      AddResultMessage (USFM, USFM.PreviousChars + ' - A space precedes this punctuation character: ' + Curr);
    SpaceFound := false;
  end;

  if Curr = ' ' then
  begin
    SpaceFound := true;
  end;

end;



initialization
  UnwantedWordsList := TStringList.Create;
  UnwantedTicksList := TStringList.Create;
  UnwantedCommentsList := TStringList.Create;

finalization
  UnwantedWordsList.Free;
  UnwantedTicksList.Free;
  UnwantedCommentsList.Free;

end.

*/

Generated by  Doxygen 1.6.0   Back to index