Logo Search packages:      
Sourcecode: bibledit version File versions  Download package

search_utils.cpp

/*
** Copyright (C) 2003-2006 Teus Benschop.
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**  
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**  
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**  
*/


#include "utilities.h"
#include "search_utils.h"
#include "bible.h"
#include "gtkwrappers.h"
#include "references.h"
#include "bibletime.h"
#include <sqlite3.h>
#include "sqlite_reader.h"
#include "gwrappers.h"
#include "gtkwrappers.h"
#include "projectutils.h"
#include "generalconfig.h"
#include "progresswindow.h"
#include "session.h"
#include "categorize.h"


void search_string_basic (const ustring& project, bool use_book_selection, unsigned int chapter, vector<ustring>& results)
/*
Basic search for a string in the text.
project: project to search.
use_book_selection: whether to limit searches to the selected books only.
chapter: currently opened chapter in the editor.
results: will contain the search results.
*/
{
  // Session data.
  Session session (0);
  // Case sensitive
  bool casesensitive = session.search_case_sensitive();
  // The string to search for. 
  // Note any apostrophies need to be doubled for SQLite.
  // We need to normalize the search expression when comparing strings.
  ustring localsearchword (session.searchword());
  localsearchword = localsearchword.normalize ();
  if (!casesensitive) localsearchword = localsearchword.casefold ();
  localsearchword = double_apostrophy (localsearchword);
  // Book and chapter selection.
  bool search_current_chapter = session.search_current_chapter();
  set<ustring> books = session.selected_books();
  // Go through each book in the project.  
  vector<ustring> availablebooks = project_get_books (project);
  for (unsigned int bk = 0; bk < availablebooks.size(); bk++) {
    // If we select books, and the book is not to be searched, skip it.
    if (use_book_selection) {
      if (books.find (availablebooks[bk]) == books.end ()) {
        continue;
      }
    }       
    // Database variables.
    sqlite3 *db;
    int rc;
    char *error = NULL;
    try
    {
      // Connect to database.
      rc = sqlite3_open(project_book_filename (project, availablebooks[bk]).c_str (), &db);
      if (rc) {
        throw runtime_error (sqlite3_errmsg(db));
      }
      sqlite3_busy_timeout (db, 2000);
      // Go through each chapter in the books.
      vector<unsigned int> chapters = project_get_chapters (project, availablebooks[bk]);
      for (unsigned int ch = 0; ch < chapters.size(); ch++) {
        // Do we search this chapter?
        if (search_current_chapter) {
          if (chapter != chapters[ch])
            continue;
        }
        // Open a reader.
        SqliteReader reader (0);
        // Execute the SQL statement, and read the results.
        char * sql;
        sql = g_strdup_printf ("select verse, usfm from '%d';", chapters[ch]);
        rc = sqlite3_exec(db, sql, reader.callback, &reader, &error);
        g_free (sql);
        if (rc != SQLITE_OK) {
          throw runtime_error (error);
        }
        for (unsigned int i = 0; i < reader.ustring0.size(); i++) {
          // Handle casesensitive.
          if (!casesensitive) reader.ustring1[i] = reader.ustring1[i].casefold();
          if (reader.ustring1[i].find (localsearchword) != string::npos) {
            results.push_back (book_chapter_verse_to_reference (availablebooks[bk], chapters[ch], reader.ustring0[i]));
          }
        }
      }
    }
    catch (exception & ex)
    {
      gw_critical (ex.what ());
    }
    // Close connection.  
    sqlite3_close (db);
  }
}


ustring search_in_bibledit_assemble_line (const ustring& input, AreaType areatype,
        bool area_id, bool area_intro, bool area_heading, bool area_chapter,
        bool area_study, bool area_notes, bool area_xref, bool area_verse)
// Assembles the line of text we have to search through, depending on the area
// selection.
{
  ustring line;
  switch (areatype) {
    case atRaw : 
      line = input;
      break;
    case atAll :
    {
      CategorizeLine cl (input);
      line = cl.id;
      line.append (" ");
      line.append (cl.intro);
      line.append (" ");
      line.append (cl.head);
      line.append (" ");
      line.append (cl.chap);
      line.append (" ");
      line.append (cl.study);
      line.append (" ");
      line.append (cl.note);
      line.append (" ");
      line.append (cl.ref);
      line.append (" ");
      line.append (cl.verse);
      break;
    }
    case atSelection :
    {
      CategorizeLine cl (input);
      if (area_id) {
        line.append (cl.id);
        line.append (" ");
      }
      if (area_intro) {
        line.append (cl.intro);
        line.append (" ");
      }
      if (area_heading) {
        line.append (cl.head);
        line.append (" ");
      }
      if (area_chapter) {
        line.append (cl.chap);
        line.append (" ");
      }
      if (area_study) {
        line.append (cl.study);
        line.append (" ");
      }
      if (area_notes) {
        line.append (cl.note);
        line.append (" ");
      }
      if (area_xref) {
        line.append (cl.ref);
        line.append (" ");
      }
      if (area_verse) {
        line.append (cl.verse);
      }
      break;
    }
  }
  return line;
}      


bool search_in_bibledit_word_boundaries_match (const ustring& text, const ustring& searchword,
                                               bool matchbeginning, bool matchending, bool globbing)
// Does the word boundary matching.
{
  /*
  Deal with matching the start of a word and/or of the end.
  
  There are four cases here.
  1. Match word start only.
  2. Match word end only.
  3. Both match start end end of a word, which implies "whole word".
  4. No matching at all.
  
  Boundary resolution is handled by pango_break(). Textual boundaries such 
  as word boundaries and line boundaries are determined for each item.
  In most cases a general algorithm suffices for this process, but in some
  cases a language module will override the generic algorithm with a more
  specific one.
  It seems to be easier programming to use GtkTextIter and GtkTextBuffer,
  rather than pango_break() directly.
  */
  
  // Whether the word matches.
  bool match = false;

  // Textbuffer for determining word boundaries.
  GtkTextBuffer * textbuffer = gtk_text_buffer_new (NULL);
  gtk_text_buffer_set_text (textbuffer, text.c_str(), -1);

  // Iterators needed.
  GtkTextIter startiter;
  GtkTextIter enditer;
  
  // Store segments of text to compare against.
  vector<ustring> segments;
    
  // Deal with case one: Match word start only.  
  if (matchbeginning && !matchending) {
    // Create a patternword for the glob-style pattern matching.
    ustring patternword = searchword + "*";
    // Collect all strings starting with a word.
    gtk_text_buffer_get_start_iter (textbuffer, &startiter);
    gtk_text_buffer_get_end_iter (textbuffer, &enditer);
    while (gtk_text_iter_forward_word_end (&startiter)) {
      gtk_text_iter_backward_word_start (&startiter);    
      segments.push_back (gtk_text_iter_get_text (&startiter, &enditer));
      gtk_text_iter_forward_word_end (&startiter);
    }
    // See whether the word is in it.
    for (unsigned int i2 = 0; i2 < segments.size(); i2++) {
      if (globbing) {
        // Glob-style pattern matching.
        if (g_pattern_match_simple (patternword.c_str(), segments[i2].c_str())) {
          match = true;
          break;
        }
      } else {
        // Straight compare.
        if (segments[i2].find (searchword) == 0) {
          match = true;
          break;
        }
      }
    }
  }
    
  // Deal with case two: Match word end only.  
  if (!matchbeginning && matchending) {
    // Create a patternword for the glob-style pattern matching.
    ustring patternword = "*" + searchword;
    // Collect all strings ending with a word.
    gtk_text_buffer_get_start_iter (textbuffer, &startiter);
    gtk_text_buffer_get_start_iter (textbuffer, &enditer);
    while (gtk_text_iter_forward_word_end (&enditer)) {
      segments.push_back (gtk_text_iter_get_text (&startiter, &enditer));
    }
    // See whether the word is in it.
    for (unsigned int i2 = 0; i2 < segments.size(); i2++) {
      if (globbing) {
        // Glob-style pattern matching.
        if (g_pattern_match_simple (patternword.c_str(), segments[i2].c_str())) {
          match = true;
          break;
        }
      } else {
        // Straight compare.
        size_t matchposition;
        matchposition = segments[i2].length() - searchword.length();
        // Negative match positions cause a false match. Solve that here.
        matchposition = CLAMP (matchposition, 0, 99999999);
        if (segments[i2].find (searchword) == matchposition) {
          match = true;
          break;
        }
      }
    }
  }
    
  // Deal with case three: Match both word start and end.  
  // Interpreted as "match whole word".
  if (matchbeginning && matchending) {
    // Create a patternword for the glob-style pattern matching.
    ustring patternword = searchword;
    // Collect all whole words.
    gtk_text_buffer_get_start_iter (textbuffer, &enditer);
    while (gtk_text_iter_forward_word_end (&enditer)) {
      startiter = enditer;
      gtk_text_iter_backward_word_start (&startiter);    
      segments.push_back (gtk_text_iter_get_text (&startiter, &enditer));
    }
    // See whether the word is in it.
    for (unsigned int i2 = 0; i2 < segments.size(); i2++) {
      if (globbing) {
        // Glob-style pattern matching.
        if (g_pattern_match_simple (patternword.c_str(), segments[i2].c_str())) {
          match = true;
          break;
        }
      } else {
        // Straight compare.
        if (segments[i2] == searchword) {
          match = true;
          break;
        }
      }
    }
  }
    
  // Case four: Nothing to test, so set found to true.
  if (!matchbeginning && !matchending)
    match = true;
    
  // Free memory.
  g_object_unref (textbuffer);

  // Return whether match.
  return match;
}


vector<ustring> search_in_bibledit ()
// Advanced searching in Bibledit.
{
  // Configuration / session
  GeneralConfiguration genconfig (0);
  Session session (0);
  // Set some variables in memory for higher speed.
  bool casesensitive = session.search_case_sensitive();
  bool search_current_book = session.search_current_book();
  bool search_current_chapter = session.search_current_chapter();
  bool search_globbing = session.search_globbing();
  bool search_start_word_match = session.search_start_word_match();
  bool search_end_word_match = session.search_end_word_match();
  set <ustring> selected_books = session.selected_books ();
  AreaType areatype = session.area_type();
  bool area_id = session.area_id();
  bool area_intro = session.area_intro();
  bool area_heading = session.area_heading();
  bool area_chapter = session.area_chapter();
  bool area_study = session.area_study();
  bool area_notes = session.area_notes();
  bool area_xref = session.area_xref();
  bool area_verse = session.area_verse();

  // Progress information.
  ProgressWindow progresswindow ("Searching", true);

  // The string to search for. 
  // Note any apostrophies need to be doubled for SQLite.
  // We need to normalize the search expression when comparing strings.
  ustring localsearchword (session.searchword().normalize());
  if (!casesensitive) localsearchword = localsearchword.casefold ();
  ustring localsearchword2 (localsearchword);

  // Storage for references: search results.
  vector<ustring> results;

  // Get our position in the text.
  ustring project = genconfig.project();
  ustring book = genconfig.book();
  unsigned int chapter = convert_to_int (genconfig.chapter());
  
  // Go through each book in the project. Progress information.
  vector<ustring> availablebooks = project_get_books (project);
  progresswindow.set_iterate (0, 1, availablebooks.size());
  for (unsigned int bk = 0; bk < availablebooks.size(); bk++) {
    progresswindow.iterate ();
    progresswindow.set_text (availablebooks[bk]);
    if (progresswindow.cancel) {
      throw runtime_error ("Search cancelled");
    }
    
    // If the book is not to be searched, skip it.
    if (search_current_book) if (book != availablebooks[bk]) continue;
    if (selected_books.find (availablebooks[bk]) == selected_books.end ()) continue;
      
    // Database variables.
    sqlite3 *db;
    int rc;
    char *error = NULL;
    try
    {
      // Connect to database.
      rc = sqlite3_open(project_book_filename (project, availablebooks[bk]).c_str (), &db);
      if (rc) {
        throw runtime_error (sqlite3_errmsg(db));
      }
      sqlite3_busy_timeout (db, 2000);

      // Go through each chapter in the book.
      vector<unsigned int> chapters = project_get_chapters (project, availablebooks[bk]);
      for (unsigned int ch = 0; ch < chapters.size(); ch++) {
        
        // Do we search this chapter?
        if (search_current_chapter) 
          if (chapter != chapters[ch])
            continue;
        
        // Read from database.
        SqliteReader reader (0);
        char * sql;
        sql = g_strdup_printf ("select verse, usfm from '%d';", chapters[ch]);
        rc = sqlite3_exec(db, sql, reader.callback, &reader, &error);
        g_free (sql);
        if (rc != SQLITE_OK) {
          throw runtime_error (error);
        }
        
        // Go through the results.
        for (unsigned int i = 0; i < reader.ustring0.size(); i++) {
          
          // Verse number.
          ustring verse = reader.ustring0[i];
         
          // Handle casesensitive and area selection.
          // Assemble text to search through.
          ustring input (reader.ustring1[i]);
          if (!casesensitive) input = input.casefold();
          ustring text = search_in_bibledit_assemble_line (input, areatype, area_id, area_intro, area_heading, area_chapter, area_study, area_notes, area_xref, area_verse);
          
          // Use glob-style pattern matching or straight match.
          if (search_globbing) {
            ustring patternword = "*" + localsearchword + "*";
            if (!g_pattern_match_simple (patternword.c_str(), text.c_str()))
              continue;
          } else {
            if (text.find (localsearchword) == string::npos)
              continue;
          }
          
          // Do the word boundary matching.
          if (!search_in_bibledit_word_boundaries_match (text, localsearchword, search_start_word_match, search_end_word_match, search_globbing))
            continue;

          // This verse "passed" all tests: a search result.  
          results.push_back (book_chapter_verse_to_reference (availablebooks[bk], chapters[ch], verse));
        }
      }
    }
    catch (exception & ex)
    {
      gw_critical (ex.what ());
    }
    // Close connection.  
    sqlite3_close (db);
  }

  // Give the results.
  return results;
}


vector<ustring> search_in_bibletime (BibleTime * bibletime)
/*
This handles the bibletime search functions.
*/
{
  ProgressWindow progresswindow ("Searching in BibleTime", false);
  progresswindow.set_fraction (0.5);

  // Storage for search results.
  vector<ustring> searchresults;

  // Session data
  Session session (0);
  
  // We need to normalize the search expression, as prescribed, when comparing strings.
  ustring localsearchword;
  localsearchword = session.searchword();
  localsearchword = localsearchword.normalize ();

  // Do the actual search.
  switch (session.searchbibletimetype()) {
    case sbttDefaultBible: 
      searchresults = bibletime->search_in_default_bible (localsearchword);
      break;
    case sbttOpenModules:
      searchresults = bibletime->search_in_open_modules (localsearchword);
      break;
    case sbttBible:
      searchresults = bibletime->search_in_module (session.search_bibletime_bible(), localsearchword);
      break;
    case sbttCommentary:
      searchresults = bibletime->search_in_module (session.search_bibletime_commentary(), localsearchword);
      break;
  }

  // Change the results to our format.
  vector<ustring> searchresults2;
  for (unsigned int i = 0; i < searchresults.size(); i++) {
    size_t position;
    position = searchresults[i].find ("] ");
    if (position != string::npos) {
      position++;
      position++;
      searchresults[i].erase (0, position);
    }
    ustring newbook, newchapter, newverse;
    if (reference_discover ("", "", "", searchresults[i], newbook, newchapter, newverse)) {
      searchresults2.push_back (newbook + " " + newchapter + ":" + newverse);
    }
  }
  
  return searchresults2;
}


void search_load_references (vector<ustring>& searchresults, 
  GtkListStore * liststore, GtkWidget * listview, GtkTreeViewColumn * treeviewcolumn)
/*
This function takes the searchresults from a search, and depending on information
entered in the search dialog, loads this in the reference area, or merges it 
with the references that are already there.
*/
{
  // Session data.  
  Session session (0);
  
  // Get the references from the editor.
  vector<ustring> inputrefs;
  References references (liststore, listview, treeviewcolumn);
  references.get_loaded ();
  references.get_references (inputrefs);

  // Deal with how the current search results interact with the ones in the editor.
  SearchResultsType searchresultstype = session.searchresultstype();
  // If the search page is zero, that means we are on basic search. And that 
  // means we always load the results in the editor, regardless of the setting
  // in the dialog.
  if (session.search_page() == 0)
    searchresultstype = sstLoad;
  switch (searchresultstype) {
    case sstLoad:
    {
      // Sort and load the references.
      sort_references (searchresults);
      references.set_references (searchresults);
      references.fill_store ();
      break;
    }
    case sstAdd:
    {
      // Add the references to the ones already in the editor
      set<string> refs_set;
      for (unsigned int i = 0; i < inputrefs.size(); i++)
        refs_set.insert (inputrefs[i]);
      for (unsigned int i = 0; i < searchresults.size(); i++)
        refs_set.insert (searchresults[i]);
      searchresults.clear();
      searchresults.assign (refs_set.begin(), refs_set.end());
      sort_references (searchresults);
      references.set_references (searchresults);
      references.fill_store ();
      break;
    }
    case sstSubtract:
    {
      // Subtract the references from the ones already in the editor.
      set<string> refs_set;
      for (unsigned int i = 0; i < inputrefs.size(); i++)
        refs_set.insert (inputrefs[i]);
      for (unsigned int i = 0; i < searchresults.size(); i++) {
        set<string>::const_iterator found_position;
        found_position = refs_set.find (searchresults[i]);
        if (found_position != refs_set.end())
          refs_set.erase (found_position);
      }
      searchresults.clear();
      searchresults.assign (refs_set.begin(), refs_set.end());
      sort_references (searchresults);
      references.set_references (searchresults);
      references.fill_store ();
      break;
    }
    case sstShare:
    {
      // Share the references with the ones already in the editor.
      // This "share" means that only references that are already in the editor
      // _and_ ones that are the result of this search, will be loaded.
      // All other ones will be discarded.
      set<ustring> refs_set;
      for (unsigned int i = 0; i < inputrefs.size(); i++)
        refs_set.insert (inputrefs[i]);
      vector<ustring> refs2;
      for (unsigned int i = 0; i < searchresults.size(); i++) {
        set<ustring>::const_iterator found_position;
        found_position = refs_set.find (searchresults[i]);
        if (found_position != refs_set.end())
          refs2.push_back (searchresults[i]);
      }
      sort_references (refs2);
      references.set_references (refs2);
      references.fill_store ();
      break;
    }
  }        
}


void search_string (GtkListStore * liststore, GtkWidget * listview, 
                    GtkTreeViewColumn * treeviewcolumn, BibleTime * bibletime)
{
  // Storage for results;
  vector<ustring> searchresults;

  // Configuration & session.
  GeneralConfiguration genconfig (0);
  Session session (0);
  
  switch (session.search_page()) {
    case 0 :
    {
      // Basic search.
      vector <ustring> mybooks;
      {
        set <ustring> selected_books = session.selected_books();
        mybooks.assign (selected_books.begin(), selected_books.end());
      }
      set <ustring> selected_books;
      selected_books.insert (genconfig.book());
      session.selected_books (selected_books);
      unsigned int chapter = convert_to_int (genconfig.chapter());
      search_string_basic (genconfig.project(), session.search_current_book(), chapter, searchresults);
      selected_books.clear();
      for (unsigned int i = 0; i < mybooks.size(); i++)
        selected_books.insert (mybooks[i]);
      session.selected_books (selected_books);
      break;
    }
    case 1 :
    {
      // Advanced search.
      searchresults = search_in_bibledit ();
      break;
    }
    case 2 :
    {
      // Search in bibletime.
      searchresults = search_in_bibletime (bibletime);
      break;
    }
  }
  // Load the references in the editor.
  search_load_references (searchresults, liststore, listview, treeviewcolumn);
}

Generated by  Doxygen 1.6.0   Back to index