Logo Search packages:      
Sourcecode: bibledit version File versions  Download package

repetition.cpp

/*
    Copyright (C) 2003-2006 Teus Benschop.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/


#include "libraries.h"
#include "utilities.h"
#include "constants.h"
#include <gtk/gtk.h>
#include <fnmatch.h>


// Reference information, i.e. "where are we?"
ustring book;
int chapter;
ustring verse;
// Containers for commandline arguments.
bool ignore = false;
set<ustring> ignores;
bool show_only = false;
set<ustring> show_onlies;
bool ignore_case = false;
// Location information.
ustring current_element;
// Verse text information.
vector<int> verse_chapter;
vector<ustring> verse_verse;
vector<size_t> verse_pointer;
ustring verse_text;


ustring get_context (GtkTextIter iter)
// Returns the context at iter: A couple of words before and after.
{
  GtkTextIter iter1 = iter;
  GtkTextIter iter2 = iter;
  gtk_text_iter_backward_word_starts (&iter1, 2);
  gtk_text_iter_forward_word_ends (&iter2, 2);
  return gtk_text_iter_get_text (&iter1, &iter2);
}


void check_repetitions (ustring& text)
/*
Checks on repeated whole words.
Output any found, under certain conditions.
*/
{
  // Load text into buffer.
  ustring text2 (text);
  text2.append (" ");
  GtkTextBuffer * textbuffer;
  textbuffer = gtk_text_buffer_new (NULL);
  gtk_text_buffer_set_text (textbuffer, text2.c_str(), -1);
  // Iterators.  
  GtkTextIter startiter, enditer;
  // Check all separate words.
  ustring previous_word;
  gtk_text_buffer_get_start_iter (textbuffer, &enditer);
  while (gtk_text_iter_forward_word_end (&enditer)) {
    startiter = enditer;
    gtk_text_iter_backward_word_start (&startiter);    
    GtkTextIter iter = startiter;
    ustring word = gtk_text_iter_get_text (&startiter, &enditer);
    // Deal with ignoring the case.
    if (ignore_case)
      word = word.casefold();
    // See if it is the same as the previous one.
    if (word == previous_word) {
      // Do we show this one?
      if (show_only) {
        if (show_onlies.find (word) == show_onlies.end ())
          continue;
      }
      // Do we ignore this one?
      if (ignore) {
        if (ignores.find (word) != ignores.end())
          continue;
      }
      // Give message.
      ustring message = "Repeated: ";
      message.append (get_context (startiter));
      output_xml_message (book, chapter, verse, message);
    }
    // Store word for next cycle.
    previous_word = word;
  }
  // Free memory
  g_object_unref (textbuffer);
}


void start_element_handler (GMarkupParseContext *context,
                            const gchar         *element_name,
                            const gchar        **attribute_names,
                            const gchar        **attribute_values,
                            gpointer             user_data,
                            GError             **error)
{
  current_element = element_name;
  if (current_element == BOOK_TAG) {
    book = attribute_values[0];
  } 
  else if (current_element == CHAPTER_TAG) {
    chapter = convert_to_int (attribute_values[0]);
  } 
  else if (current_element == VERSE_TAG ) {
    verse = attribute_values[0];
  }
}


void end_element_handler (GMarkupParseContext *context,
                          const gchar         *element_name,
                          gpointer             user_data,
                          GError             **error)
{
}


void text_handler (GMarkupParseContext *context,
                   const gchar         *text,
                   gsize                text_len,
                   gpointer             user_data,
                   GError             **error)
{
  ustring utext (text);
  utext = trim (utext);
  if (utext.empty())
    return;
  check_repetitions (utext);
}



void passthrough_handler    (GMarkupParseContext *context,
                             const gchar         *passthrough_text,
                             gsize                text_len,
                             gpointer             user_data,
                             GError             **error)
{
}


void error_handler          (GMarkupParseContext *context,
                             GError              *error,
                             gpointer             user_data)
{
  cerr << error->message << endl;
}


int main (int argc, char *argv[])
{
  // Initialize GTK
  gtk_init (&argc, &argv);
  // Process command line arguments.
  for (int i = 1; i < argc; i++) {
    ustring argument;
    argument = argv[i];
    if (argument.length() > 2) {
      if (argument.substr (0, 2) == "--") {
        argument.erase (0, 2);
        if (argument == "ignore") {
          ignore = true;
          argument = argv[++i];
          ReadText rt (argument, true);
          for (unsigned int i2 = 0; i2 < rt.lines.size(); i2++) {
            ignores.insert (rt.lines[i2]);
          }
        }
        if (argument == "show-only") {
          show_only = true;
          argument = argv[++i];
          ReadText rt (argument, true);
          for (unsigned int i2 = 0; i2 < rt.lines.size(); i2++) {
            show_onlies.insert (rt.lines[i2]);
          }
        }
        if (argument == "ignore-case") {
          ignore_case = true;
        }
        if (argument == "help") {
          cout << "sc-repetition reads checking units from stdin, looks for" << endl;
          cout << "identical contiguous words in the text, and outputs its report on stdout." << endl;
          cout << "Optional commandline arguments:" << endl;
          cout << "--ignore <filename>" << endl;
          cout << "  A file containing repetitions to ignore, one per line" << endl;
          cout << "--show-only <filename>" << endl;
          cout << "  A file containing repetitions to show up only, one per line" << endl;
          cout << "--ignore-case" << endl;
          cout << "  Ignores the case of the words" << endl;
          return 0;
        }
      }
    }
  }
  // Read data from stdin.
  GIOChannel* io;
  gchar* text;
  gsize length;
  io = g_io_channel_unix_new (0);
  g_io_channel_read_to_end (io, &text, &length, NULL);
  // Set up parser.
  GMarkupParseContext *context;
  GMarkupParser parser = {
    start_element_handler,
    end_element_handler,
    text_handler,
    passthrough_handler,
    error_handler
  };
  // Parse xml data.
  context = g_markup_parse_context_new (&parser, GMarkupParseFlags (0), NULL, NULL);
  g_markup_parse_context_parse (context, text, length, NULL);
  g_markup_parse_context_end_parse (context, NULL);
  // Free some resources.  
  g_markup_parse_context_free (context);
  g_free (text);
  g_io_channel_unref (io);
  // Output the data.
  for (unsigned int i = 0; i < 0; i++) {
    cout << xml_tag (0, MESSAGE_TAG, false) << endl;
    ustring xml;
    xml = xml_text_embed_in_tags (1, CHARACTER_TAG, "test");
    write (1, xml.c_str(), strlen (xml.c_str()));
    write (1, "\n", 1);
    cout << xml_tag (0, MESSAGE_TAG, true) << endl;
  }
  // Ready.
  return 0;
}

Generated by  Doxygen 1.6.0   Back to index