Logo Search packages:      
Sourcecode: bibledit version File versions  Download package

matching-pairs.cpp

/*
    Copyright (C) 2003-2006 Teus Benschop.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/


#include "libraries.h"
#include "utilities.h"
#include "constants.h"
#include <gtk/gtk.h>


class Opener
{
public:
  Opener (const ustring& char_in, gunichar unichar_in, 
          const ustring& book_in, int chapter_in, const ustring& verse_in,
          const ustring& context_in);
  ustring character;
  gunichar unichar;
  ustring book;
  int chapter;
  ustring verse;
  ustring context;
private:
};


Opener::Opener (const ustring& char_in, gunichar unichar_in, 
                const ustring& book_in, int chapter_in, const ustring& verse_in,
                const ustring& context_in)
{
  character = char_in;
  unichar = unichar_in;
  book = book_in;
  chapter = chapter_in;
  verse = verse_in;
  context = context_in;
}


// Reference information, i.e. "where are we?"
ustring book;
int chapter;
ustring verse;
// Containers for commandline arguments.
set<gunichar> ignores;
// Location information.
ustring current_element;
// Openers and closers to check.
set<gunichar> gopeners;
set<gunichar> gclosers;
// Running matching pairs.
vector<Opener> openers;


ustring get_context (ustring& line, unsigned int offset)
// Returns the context at offset: A couple of words before and after.
{
  // Result.
  ustring returnvalue;
  // Load text into buffer.
  GtkTextBuffer * textbuffer;
  textbuffer = gtk_text_buffer_new (NULL);
  gtk_text_buffer_set_text (textbuffer, line.c_str(), -1);
  // Iterators.  
  GtkTextIter iter1;
  GtkTextIter iter2;
  // Find boundaries of context to return.
  gtk_text_buffer_get_iter_at_offset (textbuffer, &iter1, offset);
  iter2 = iter1;
  gtk_text_iter_backward_word_starts (&iter1, 2);
  gtk_text_iter_forward_word_ends (&iter2, 2);
  return gtk_text_iter_get_text (&iter1, &iter2);
  // Free memory
  g_object_unref (textbuffer);
  // Give us the result.
  return returnvalue;
}


void check_matched_pairs (ustring& text)
/*
Checks on matched pairs.
Output any problems found.
*/
{
  for (unsigned int i = 0; i < text.length(); i++) {
    // Get the unicode character;
    gunichar unichar;
    unichar = g_utf8_get_char(text.substr(i, 1).c_str());
    // If we found a mirror character, investigate further.
    gunichar mirror;
    if (g_unichar_get_mirror_char (unichar, &mirror)) {
      // Do we ignore this one?
      if (ignores.find (unichar) != ignores.end())
        continue;
      // See whether this one opens or closes a pair.
      if (gopeners.find (unichar) != gopeners.end ()) {
        // It opens: Add data.
        Opener opener (text.substr(i, 1), unichar, book, chapter, verse, get_context (text, i));
        openers.push_back (opener);
        continue;
      } else {
        // It closes: check for previously seen opener.
        bool give_message = false;
        if (openers.empty()) {
          give_message = true;
        }
        if (!give_message) {
          if (openers[openers.size() - 1].unichar == mirror) {
            // Remove last one.
            openers.pop_back();
          } else {
            // Flag message.
            give_message = true;
          }
        }
        if (give_message) {
          // Give message;
          ustring message = "Pair not opened: ";
          message.append (get_context (text, i));
          output_xml_message (book, chapter, verse, message);
        }
      }
    }
  }
}


void check_pairs_clean ()
// See if there is still any opening punctuation that have not been matched with 
// closing punctuation.
{
  // Check for them and give messages.
  for (unsigned int i = 0; i < openers.size(); i ++) {
    ustring message = "Pair not closed: ";
    message.append (openers[i].context);
    output_xml_message (openers[i].book, openers[i].chapter, openers[i].verse, message);
  }
  // Clear them up.
  openers.clear();
}


void start_element_handler (GMarkupParseContext *context,
                            const gchar         *element_name,
                            const gchar        **attribute_names,
                            const gchar        **attribute_values,
                            gpointer             user_data,
                            GError             **error)
{
  current_element = element_name;
  if (current_element == BOOK_TAG) {
    book = attribute_values[0];
  } 
  else if (current_element == CHAPTER_TAG) {
    chapter = convert_to_int (attribute_values[0]);
  } 
  else if (current_element == VERSE_TAG ) {
    verse = attribute_values[0];
  }
}


void end_element_handler (GMarkupParseContext *context,
                          const gchar         *element_name,
                          gpointer             user_data,
                          GError             **error)
{
  // At the end of each chapter, check whether all pairs are "clean"
  current_element = element_name;
  if (current_element == CHAPTER_TAG) {
    check_pairs_clean ();
  }
}


void text_handler (GMarkupParseContext *context,
                   const gchar         *text,
                   gsize                text_len,
                   gpointer             user_data,
                   GError             **error)
{
  ustring utext (text);
  utext = trim (utext);
  if (utext.empty())
    return;
  check_matched_pairs (utext);
}



void passthrough_handler    (GMarkupParseContext *context,
                             const gchar         *passthrough_text,
                             gsize                text_len,
                             gpointer             user_data,
                             GError             **error)
{
}


void error_handler          (GMarkupParseContext *context,
                             GError              *error,
                             gpointer             user_data)
{
  cerr << error->message << endl;
}


int main (int argc, char *argv[])
{
  // Initialize GTK
  gtk_init (&argc, &argv);
  // Process command line arguments.
  for (int i = 1; i < argc; i++) {
    ustring argument;
    argument = argv[i];
    if (argument.length() > 2) {
      if (argument.substr (0, 2) == "--") {
        argument.erase (0, 2);
        if (argument == "ignore") {
          argument = argv[++i];
          for (unsigned int i2 = 0; i2 < argument.length(); i2++) {
            gunichar unichar = g_utf8_get_char(argument.substr(i2, 1).c_str());
            ignores.insert (unichar);
            gunichar mirror;
            if (g_unichar_get_mirror_char (unichar, &mirror)) {
              ignores.insert (mirror);
            }
          }
        }
        if (argument == "help") {
          cout << "sc-matched-pairs reads checking units from stdin, checks that" << endl;
          cout << "opening and closing punctuation match and are in the proper order," << endl;
          cout << "and outputs its report on stdout." << endl;
          cout << "Optional commandline arguments:" << endl;
          cout << "--ignore <list of punctuation>" << endl;
          cout << "  A list of punctuation to ignore, without spaces." << endl;
          cout << "  The check will check all possible pairs except the ones given here." << endl;
          cout << "  Only one of a pair is needed, the other one is added automatically." << endl;
          return 0;
        }
      }
    }
  }
  // Get list of openers and closers.
  for (gunichar i = 0; i < 1000000; i++) {
    gunichar mirror;
    if (g_unichar_get_mirror_char (i, &mirror)) {
      if (gclosers.find (i) == gclosers.end()) {
        gopeners.insert (i);
        gclosers.insert (mirror);
      }
    }
  }
  // Read data from stdin.
  GIOChannel* io;
  gchar* text;
  gsize length;
  io = g_io_channel_unix_new (0);
  g_io_channel_read_to_end (io, &text, &length, NULL);
  // Set up parser.
  GMarkupParseContext *context;
  GMarkupParser parser = {
    start_element_handler,
    end_element_handler,
    text_handler,
    passthrough_handler,
    error_handler
  };
  // Parse xml data.
  context = g_markup_parse_context_new (&parser, GMarkupParseFlags (0), NULL, NULL);
  g_markup_parse_context_parse (context, text, length, NULL);
  g_markup_parse_context_end_parse (context, NULL);
  // Free some resources.  
  g_markup_parse_context_free (context);
  g_free (text);
  g_io_channel_unref (io);
  // See if there are still any markers to be matched.
  check_pairs_clean ();
  // Ready.
  return 0;
}

Generated by  Doxygen 1.6.0   Back to index