2 * Implements search and copy functionality for Djvu files.
3 * Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 #include <libdjvu/miniexp.h>
24 #include "djvu-text-page.h"
28 * djvu_text_page_selection_process:
29 * @page: #DjvuTextPage instance
30 * @p: s-expression to append
31 * @delimit: character/word/... delimiter
33 * Appends the string in @p to the page text.
35 * Returns: whether the end was not reached in this s-expression
38 djvu_text_page_selection_process (DjvuTextPage *page,
42 if (page->text || p == page->start) {
43 char *token_text = (char *) miniexp_to_str (miniexp_nth (5, p));
46 g_strjoin (delimit & 2 ? "\n" :
47 delimit & 1 ? " " : NULL,
48 page->text, token_text,
51 page->text = new_text;
53 page->text = g_strdup (token_text);
61 * djvu_text_page_selection:
62 * @page: #DjvuTextPage instance
64 * @delimit: character/word/... delimiter
66 * Walks the tree in @p and appends the text with
67 * djvu_text_page_selection_process() for all s-expressions
68 * between the start and end fields.
70 * Returns: whether the end was not reached in this subtree
73 djvu_text_page_selection (DjvuTextPage *page,
77 g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
78 (miniexp_car (p)), FALSE);
80 if (miniexp_car (p) != page->char_symbol)
81 delimit |= miniexp_car (p) == page->word_symbol ? 1 : 2;
83 miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
84 while (deeper != miniexp_nil) {
85 miniexp_t str = miniexp_car (deeper);
86 if (miniexp_stringp (str)) {
87 if (!djvu_text_page_selection_process
91 if (!djvu_text_page_selection
96 deeper = miniexp_cdr (deeper);
102 djvu_text_page_limits_process (DjvuTextPage *page,
108 current.x1 = miniexp_to_int (miniexp_nth (1, p));
109 current.y1 = miniexp_to_int (miniexp_nth (2, p));
110 current.x2 = miniexp_to_int (miniexp_nth (3, p));
111 current.y2 = miniexp_to_int (miniexp_nth (4, p));
112 if (current.x2 >= rect->x1 && current.y1 <= rect->y2 &&
113 current.x1 <= rect->x2 && current.y2 >= rect->y1) {
114 if (page->start == miniexp_nil)
122 djvu_text_page_limits (DjvuTextPage *page,
128 g_return_if_fail (miniexp_consp (p) &&
129 miniexp_symbolp (miniexp_car (p)));
131 miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
132 while (deeper != miniexp_nil) {
133 miniexp_t str = miniexp_car (deeper);
134 if (miniexp_stringp (str))
135 djvu_text_page_limits_process (page, p, rect);
137 djvu_text_page_limits (page, str, rect);
139 deeper = miniexp_cdr (deeper);
144 djvu_text_page_copy (DjvuTextPage *page,
145 EvRectangle *rectangle)
149 page->start = miniexp_nil;
150 page->end = miniexp_nil;
151 djvu_text_page_limits (page, page->text_structure, rectangle);
152 djvu_text_page_selection (page, page->text_structure, 0);
154 /* Do not free the string */
162 * djvu_text_page_position:
163 * @page: #DjvuTextPage instance
164 * @position: index in the page text
166 * Returns the closest s-expression that contains the given position in
169 * Returns: closest s-expression
172 djvu_text_page_position (DjvuTextPage *page,
175 GArray *links = page->links;
177 int hi = links->len - 1;
180 g_return_val_if_fail (hi >= 0, miniexp_nil);
182 /* Shamelessly copied from GNU classpath */
184 mid = (low + hi) >> 1;
186 &g_array_index (links, DjvuTextLink, mid);
187 if (link->position == position)
189 else if (link->position > position)
195 return g_array_index (page->links, DjvuTextLink, mid).pair;
199 * djvu_text_page_union:
200 * @target: first rectangle and result
201 * @source: second rectangle
203 * Calculates the bounding box of two rectangles and stores the reuslt
207 djvu_text_page_union (EvRectangle *target,
210 if (source->x1 < target->x1)
211 target->x1 = source->x1;
212 if (source->x2 > target->x2)
213 target->x2 = source->x2;
214 if (source->y1 < target->y1)
215 target->y1 = source->y1;
216 if (source->y2 > target->y2)
217 target->y2 = source->y2;
221 * djvu_text_page_sexpr_process:
222 * @page: #DjvuTextPage instance
223 * @p: s-expression to append
224 * @start: first s-expression in the selection
225 * @end: last s-expression in the selection
227 * Appends the rectangle defined by @p to the internal bounding box rectangle.
229 * Returns: whether the end was not reached in this s-expression
232 djvu_text_page_sexpr_process (DjvuTextPage *page,
237 if (page->bounding_box || p == start) {
238 EvRectangle *new_rectangle = g_new (EvRectangle, 1);
239 new_rectangle->x1 = miniexp_to_int (miniexp_nth (1, p));
240 new_rectangle->y1 = miniexp_to_int (miniexp_nth (2, p));
241 new_rectangle->x2 = miniexp_to_int (miniexp_nth (3, p));
242 new_rectangle->y2 = miniexp_to_int (miniexp_nth (4, p));
243 if (page->bounding_box) {
244 djvu_text_page_union (page->bounding_box,
246 g_free (new_rectangle);
248 page->bounding_box = new_rectangle;
256 * djvu_text_page_sexpr:
257 * @page: #DjvuTextPage instance
259 * @start: first s-expression in the selection
260 * @end: last s-expression in the selection
262 * Walks the tree in @p and extends the rectangle with
263 * djvu_text_page_process() for all s-expressions between @start and @end.
265 * Returns: whether the end was not reached in this subtree
268 djvu_text_page_sexpr (DjvuTextPage *page,
273 g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
274 (miniexp_car (p)), FALSE);
276 miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
277 while (deeper != miniexp_nil) {
278 miniexp_t str = miniexp_car (deeper);
279 if (miniexp_stringp (str)) {
280 if (!djvu_text_page_sexpr_process
281 (page, p, start, end))
284 if (!djvu_text_page_sexpr
285 (page, str, start, end))
288 deeper = miniexp_cdr (deeper);
294 * djvu_text_page_box:
295 * @page: #DjvuTextPage instance
296 * @start: first s-expression in the selection
297 * @end: last s-expression in the selection
299 * Builds a rectangle that contains all s-expressions in the given range.
302 djvu_text_page_box (DjvuTextPage *page,
306 page->bounding_box = NULL;
307 djvu_text_page_sexpr (page, page->text_structure, start, end);
308 return page->bounding_box;
312 * djvu_text_page_append_search:
313 * @page: #DjvuTextPage instance
315 * @case_sensitive: do not ignore case
316 * @delimit: insert spaces because of higher (sentence/paragraph/...) break
318 * Appends the tree in @p to the internal text string.
321 djvu_text_page_append_text (DjvuTextPage *page,
323 gboolean case_sensitive,
328 g_return_if_fail (miniexp_consp (p) &&
329 miniexp_symbolp (miniexp_car (p)));
331 delimit |= page->char_symbol != miniexp_car (p);
333 miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
334 while (deeper != miniexp_nil) {
335 miniexp_t data = miniexp_car (deeper);
336 if (miniexp_stringp (data)) {
338 link.position = page->text == NULL ? 0 :
341 g_array_append_val (page->links, link);
343 token_text = (char *) miniexp_to_str (data);
345 token_text = g_utf8_casefold (token_text, -1);
346 if (page->text == NULL)
347 page->text = g_strdup (token_text);
350 g_strjoin (delimit ? " " : NULL,
351 page->text, token_text,
354 page->text = new_text;
359 djvu_text_page_append_text (page, data,
360 case_sensitive, delimit);
362 deeper = miniexp_cdr (deeper);
367 * djvu_text_page_search:
368 * @page: #DjvuTextPage instance
369 * @text: text to search
371 * Searches the page for the given text. The results list has to be
372 * externally freed afterwards.
375 djvu_text_page_search (DjvuTextPage *page,
378 char *haystack = page->text;
381 if (page->links->len == 0)
384 search_len = strlen (text);
385 while ((haystack = strstr (haystack, text)) != NULL) {
386 int start_p = haystack - page->text;
387 miniexp_t start = djvu_text_page_position (page, start_p);
388 int end_p = start_p + search_len - 1;
389 miniexp_t end = djvu_text_page_position (page, end_p);
390 result = djvu_text_page_box (page, start, end);
392 page->results = g_list_prepend (page->results, result);
393 haystack = haystack + search_len;
395 page->results = g_list_reverse (page->results);
400 * djvu_text_page_prepare_search:
401 * @page: #DjvuTextPage instance
402 * @case_sensitive: do not ignore case
404 * Indexes the page text and prepares the page for subsequent searches.
407 djvu_text_page_prepare_search (DjvuTextPage *page,
408 gboolean case_sensitive)
410 djvu_text_page_append_text (page, page->text_structure,
411 case_sensitive, FALSE);
415 * djvu_text_page_new:
416 * @text: S-expression of the page text
418 * Creates a new page to search.
420 * Returns: new #DjvuTextPage instance
423 djvu_text_page_new (miniexp_t text)
427 page = g_new0 (DjvuTextPage, 1);
428 page->links = g_array_new (FALSE, FALSE, sizeof (DjvuTextLink));
429 page->char_symbol = miniexp_symbol ("char");
430 page->word_symbol = miniexp_symbol ("word");
431 page->text_structure = text;
436 * djvu_text_page_free:
437 * @page: #DjvuTextPage instance
439 * Frees the given #DjvuTextPage instance.
442 djvu_text_page_free (DjvuTextPage *page)
445 g_array_free (page->links, TRUE);