2 * Implements search and copy functionality for Djvu files.
3 * Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 #include <libdjvu/miniexp.h>
24 #include "djvu-text-page.h"
28 * djvu_text_page_selection_process:
29 * @page: #DjvuTextPage instance
30 * @p: s-expression to append
31 * @delimit: character/word/... delimiter
33 * Appends the string in @p to the page text.
35 * Returns: whether the end was not reached in this s-expression
38 djvu_text_page_selection_process (DjvuTextPage *page,
42 if (page->text || p == page->start) {
43 char *token_text = (char *) miniexp_to_str (miniexp_nth (5, p));
46 g_strjoin (delimit & 2 ? "\n" :
47 delimit & 1 ? " " : NULL,
48 page->text, token_text,
51 page->text = new_text;
53 page->text = g_strdup (token_text);
61 * djvu_text_page_selection:
62 * @page: #DjvuTextPage instance
64 * @delimit: character/word/... delimiter
66 * Walks the tree in @p and appends the text with
67 * djvu_text_page_selection_process() for all s-expressions
68 * between the start and end fields.
70 * Returns: whether the end was not reached in this subtree
73 djvu_text_page_selection (DjvuTextPage *page,
77 g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
78 (miniexp_car (p)), FALSE);
80 if (miniexp_car (p) != page->char_symbol)
81 delimit |= miniexp_car (p) == page->word_symbol ? 1 : 2;
83 miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
84 while (deeper != miniexp_nil) {
85 miniexp_t str = miniexp_car (deeper);
86 if (miniexp_stringp (str)) {
87 if (!djvu_text_page_selection_process
91 if (!djvu_text_page_selection
96 deeper = miniexp_cdr (deeper);
102 djvu_text_page_limits_process (DjvuTextPage *page,
108 current.x1 = miniexp_to_int (miniexp_nth (1, p));
109 current.y1 = miniexp_to_int (miniexp_nth (2, p));
110 current.x2 = miniexp_to_int (miniexp_nth (3, p));
111 current.y2 = miniexp_to_int (miniexp_nth (4, p));
112 if (current.x2 >= rect->x1 && current.y1 <= rect->y2 &&
113 current.x1 <= rect->x2 && current.y2 >= rect->y1) {
114 if (page->start == miniexp_nil)
122 djvu_text_page_limits (DjvuTextPage *page,
126 g_return_if_fail (miniexp_consp (p) &&
127 miniexp_symbolp (miniexp_car (p)));
129 miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
130 while (deeper != miniexp_nil) {
131 miniexp_t str = miniexp_car (deeper);
132 if (miniexp_stringp (str))
133 djvu_text_page_limits_process (page, p, rect);
135 djvu_text_page_limits (page, str, rect);
137 deeper = miniexp_cdr (deeper);
142 djvu_text_page_copy (DjvuTextPage *page,
143 EvRectangle *rectangle)
147 page->start = miniexp_nil;
148 page->end = miniexp_nil;
149 djvu_text_page_limits (page, page->text_structure, rectangle);
150 djvu_text_page_selection (page, page->text_structure, 0);
152 /* Do not free the string */
160 * djvu_text_page_position:
161 * @page: #DjvuTextPage instance
162 * @position: index in the page text
164 * Returns the closest s-expression that contains the given position in
167 * Returns: closest s-expression
170 djvu_text_page_position (DjvuTextPage *page,
173 GArray *links = page->links;
175 int hi = links->len - 1;
178 g_return_val_if_fail (hi >= 0, miniexp_nil);
180 /* Shamelessly copied from GNU classpath */
182 mid = (low + hi) >> 1;
184 &g_array_index (links, DjvuTextLink, mid);
185 if (link->position == position)
187 else if (link->position > position)
193 return g_array_index (page->links, DjvuTextLink, mid).pair;
197 * djvu_text_page_union:
198 * @target: first rectangle and result
199 * @source: second rectangle
201 * Calculates the bounding box of two rectangles and stores the reuslt
205 djvu_text_page_union (EvRectangle *target,
208 if (source->x1 < target->x1)
209 target->x1 = source->x1;
210 if (source->x2 > target->x2)
211 target->x2 = source->x2;
212 if (source->y1 < target->y1)
213 target->y1 = source->y1;
214 if (source->y2 > target->y2)
215 target->y2 = source->y2;
219 * djvu_text_page_sexpr_process:
220 * @page: #DjvuTextPage instance
221 * @p: s-expression to append
222 * @start: first s-expression in the selection
223 * @end: last s-expression in the selection
225 * Appends the rectangle defined by @p to the internal bounding box rectangle.
227 * Returns: whether the end was not reached in this s-expression
230 djvu_text_page_sexpr_process (DjvuTextPage *page,
235 if (page->bounding_box || p == start) {
236 EvRectangle *new_rectangle = g_new (EvRectangle, 1);
237 new_rectangle->x1 = miniexp_to_int (miniexp_nth (1, p));
238 new_rectangle->y1 = miniexp_to_int (miniexp_nth (2, p));
239 new_rectangle->x2 = miniexp_to_int (miniexp_nth (3, p));
240 new_rectangle->y2 = miniexp_to_int (miniexp_nth (4, p));
241 if (page->bounding_box) {
242 djvu_text_page_union (page->bounding_box,
244 g_free (new_rectangle);
246 page->bounding_box = new_rectangle;
254 * djvu_text_page_sexpr:
255 * @page: #DjvuTextPage instance
257 * @start: first s-expression in the selection
258 * @end: last s-expression in the selection
260 * Walks the tree in @p and extends the rectangle with
261 * djvu_text_page_process() for all s-expressions between @start and @end.
263 * Returns: whether the end was not reached in this subtree
266 djvu_text_page_sexpr (DjvuTextPage *page,
271 g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
272 (miniexp_car (p)), FALSE);
274 miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
275 while (deeper != miniexp_nil) {
276 miniexp_t str = miniexp_car (deeper);
277 if (miniexp_stringp (str)) {
278 if (!djvu_text_page_sexpr_process
279 (page, p, start, end))
282 if (!djvu_text_page_sexpr
283 (page, str, start, end))
286 deeper = miniexp_cdr (deeper);
292 * djvu_text_page_box:
293 * @page: #DjvuTextPage instance
294 * @start: first s-expression in the selection
295 * @end: last s-expression in the selection
297 * Builds a rectangle that contains all s-expressions in the given range.
300 djvu_text_page_box (DjvuTextPage *page,
304 page->bounding_box = NULL;
305 djvu_text_page_sexpr (page, page->text_structure, start, end);
306 return page->bounding_box;
310 * djvu_text_page_append_search:
311 * @page: #DjvuTextPage instance
313 * @case_sensitive: do not ignore case
314 * @delimit: insert spaces because of higher (sentence/paragraph/...) break
316 * Appends the tree in @p to the internal text string.
319 djvu_text_page_append_text (DjvuTextPage *page,
321 gboolean case_sensitive,
326 g_return_if_fail (miniexp_consp (p) &&
327 miniexp_symbolp (miniexp_car (p)));
329 delimit |= page->char_symbol != miniexp_car (p);
331 miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
332 while (deeper != miniexp_nil) {
333 miniexp_t data = miniexp_car (deeper);
334 if (miniexp_stringp (data)) {
336 link.position = page->text == NULL ? 0 :
339 g_array_append_val (page->links, link);
341 token_text = (char *) miniexp_to_str (data);
343 token_text = g_utf8_casefold (token_text, -1);
344 if (page->text == NULL)
345 page->text = g_strdup (token_text);
348 g_strjoin (delimit ? " " : NULL,
349 page->text, token_text,
352 page->text = new_text;
357 djvu_text_page_append_text (page, data,
358 case_sensitive, delimit);
360 deeper = miniexp_cdr (deeper);
365 * djvu_text_page_search:
366 * @page: #DjvuTextPage instance
367 * @text: text to search
369 * Searches the page for the given text. The results list has to be
370 * externally freed afterwards.
373 djvu_text_page_search (DjvuTextPage *page,
376 char *haystack = page->text;
379 if (page->links->len == 0)
382 search_len = strlen (text);
383 while ((haystack = strstr (haystack, text)) != NULL) {
384 int start_p = haystack - page->text;
385 miniexp_t start = djvu_text_page_position (page, start_p);
386 int end_p = start_p + search_len - 1;
387 miniexp_t end = djvu_text_page_position (page, end_p);
388 result = djvu_text_page_box (page, start, end);
390 page->results = g_list_prepend (page->results, result);
391 haystack = haystack + search_len;
393 page->results = g_list_reverse (page->results);
398 * djvu_text_page_prepare_search:
399 * @page: #DjvuTextPage instance
400 * @case_sensitive: do not ignore case
402 * Indexes the page text and prepares the page for subsequent searches.
405 djvu_text_page_prepare_search (DjvuTextPage *page,
406 gboolean case_sensitive)
408 djvu_text_page_append_text (page, page->text_structure,
409 case_sensitive, FALSE);
413 * djvu_text_page_new:
414 * @text: S-expression of the page text
416 * Creates a new page to search.
418 * Returns: new #DjvuTextPage instance
421 djvu_text_page_new (miniexp_t text)
425 page = g_new0 (DjvuTextPage, 1);
426 page->links = g_array_new (FALSE, FALSE, sizeof (DjvuTextLink));
427 page->char_symbol = miniexp_symbol ("char");
428 page->word_symbol = miniexp_symbol ("word");
429 page->text_structure = text;
434 * djvu_text_page_free:
435 * @page: #DjvuTextPage instance
437 * Frees the given #DjvuTextPage instance.
440 djvu_text_page_free (DjvuTextPage *page)
443 g_array_free (page->links, TRUE);