[dots/remove-assistant: 36/38] Use an alternative pigy poppler based backed for pdf
- From: Fernando Herrera de las Heras <fherrera src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dots/remove-assistant: 36/38] Use an alternative pigy poppler based backed for pdf
- Date: Thu, 15 Jul 2010 21:10:00 +0000 (UTC)
commit 32360f4a4f57acab36cd69e240b5f771e7532650
Author: Fernando Herrera <fherrera onirica com>
Date: Thu Jul 15 03:04:34 2010 +0200
Use an alternative pigy poppler based backed for pdf
TODO | 4 ++-
bin/dotstableeditor | 1 +
dots/Makefile.am | 3 +-
dots/app_window.py | 1 +
dots/document_builder.py | 6 +++-
dots/{pdfdocument.py => pdfdocument_gi.py} | 41 ++++++++++++++-------
dots/{pdfdocument.py => pdfdocument_pypoppler.py} | 30 ++++++++++++---
7 files changed, 63 insertions(+), 23 deletions(-)
---
diff --git a/TODO b/TODO
index fbdf21c..4f38bd1 100644
--- a/TODO
+++ b/TODO
@@ -1,5 +1,7 @@
- Put cursor at beginning of textviews when they are exposed.
-- Nice error handling when importing odt documents.
+- Nice error handling:
+ when importing odt documents.
+ when pdf has no text
- Edit the text
- .desktop file
- basic packaging
diff --git a/bin/dotstableeditor b/bin/dotstableeditor
index 9164deb..77eb4c0 100755
--- a/bin/dotstableeditor
+++ b/bin/dotstableeditor
@@ -28,6 +28,7 @@
import os
from sys import argv
from dots import host_settings
+import pygtk
import gtk
import re
diff --git a/dots/Makefile.am b/dots/Makefile.am
index e0c9f1b..643d96c 100644
--- a/dots/Makefile.am
+++ b/dots/Makefile.am
@@ -8,7 +8,8 @@ dots_PYTHON = \
host_settings.py \
config_builder.py \
document.py \
- pdfdocument.py \
+ pdfdocument_gi.py \
+ pdfdocument_pypoppler.py \
odtdocument.py \
xmldocument.py \
document_builder.py \
diff --git a/dots/app_window.py b/dots/app_window.py
index d389200..b9bc17b 100644
--- a/dots/app_window.py
+++ b/dots/app_window.py
@@ -17,6 +17,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+import pygtk
import gtk, glib
import os, tempfile
from config_builder import ConfigBuilder
diff --git a/dots/document_builder.py b/dots/document_builder.py
index bc4e966..6fb3b32 100644
--- a/dots/document_builder.py
+++ b/dots/document_builder.py
@@ -19,8 +19,12 @@
import mimetypes
from document import Document
from odtdocument import OdtDocument
-from pdfdocument import PdfDocument
from xmldocument import XmlDocument
+try:
+ from pdfdocument_gi import PdfDocument
+except:
+ from pdfdocument_pypoppler import PdfDocument
+ pass
def document_new(filename):
mime_type, encoding = mimetypes.guess_type (filename)
diff --git a/dots/pdfdocument.py b/dots/pdfdocument_gi.py
similarity index 74%
copy from dots/pdfdocument.py
copy to dots/pdfdocument_gi.py
index c030647..7c6d63f 100644
--- a/dots/pdfdocument.py
+++ b/dots/pdfdocument_gi.py
@@ -18,30 +18,43 @@
from document import Document
from translator import Translator
-import poppler
+import pygtk
+pygtk.require('2.0')
+from gi.repository import Poppler
class PdfDocument(Document):
- def translate(self, config):
- # FIXME: Check if poppler gives us always UTF-8 strings
- config['outputFormat']['inputTextEncoding'] = "UTF8"
- self.translator = Translator(config)
- uri = "file://" + self.input_file
- document = poppler.document_new_from_file (uri, None)
+ def _get_text (self, file):
+ uri = "file://" + file
+ document = Poppler.Document.new_from_file (uri, "")
npages = document.get_n_pages()
text = ""
for p in range(0,npages):
page = document.get_page(p)
- w,h = page.get_size()
- r = poppler.Rectangle ()
- r.x1 = 0
- r.x2 = w
- r.y1 = 0
- r.y2 = h
# Currently we are getting the layout from the pdf here
# we should collapse it
- text += page.get_text(poppler.SELECTION_LINE,r)
+ text += page.get_text()
+
+ return text
+
+
+ def translate(self, config):
+ # FIXME: Check if poppler gives us always UTF-8 strings
+ config['outputFormat']['inputTextEncoding'] = "UTF8"
+ self.translator = Translator(config)
+ text = self._get_text(self.input_file)
self.braille_text = self.translator.translate_string (text)
return
+
+
+if __name__ == "__main__":
+ import sys
+
+ if len(sys.argv) > 1:
+ document = PdfDocument(sys.argv[1])
+ print document._get_text(sys.argv[1])
+
+
+ PdfDocument._get_text
diff --git a/dots/pdfdocument.py b/dots/pdfdocument_pypoppler.py
similarity index 80%
rename from dots/pdfdocument.py
rename to dots/pdfdocument_pypoppler.py
index c030647..ab6dbbf 100644
--- a/dots/pdfdocument.py
+++ b/dots/pdfdocument_pypoppler.py
@@ -21,11 +21,8 @@ from translator import Translator
import poppler
class PdfDocument(Document):
- def translate(self, config):
- # FIXME: Check if poppler gives us always UTF-8 strings
- config['outputFormat']['inputTextEncoding'] = "UTF8"
- self.translator = Translator(config)
- uri = "file://" + self.input_file
+ def _get_text (self, file):
+ uri = "file://" + file
document = poppler.document_new_from_file (uri, None)
npages = document.get_n_pages()
text = ""
@@ -39,9 +36,30 @@ class PdfDocument(Document):
r.y2 = h
# Currently we are getting the layout from the pdf here
# we should collapse it
- text += page.get_text(poppler.SELECTION_LINE,r)
+ text += page.get_text(poppler.SELECTION_GLYPH,r)
+ print text
+
+ return text
+
+
+ def translate(self, config):
+ # FIXME: Check if poppler gives us always UTF-8 strings
+ config['outputFormat']['inputTextEncoding'] = "UTF8"
+ self.translator = Translator(config)
+ text = self._get_text(config.input_file)
self.braille_text = self.translator.translate_string (text)
return
+
+
+if __name__ == "__main__":
+ import sys
+
+ if len(sys.argv) > 1:
+ document = PdfDocument(sys.argv[1])
+ print document._get_text(sys.argv[1])
+
+
+ PdfDocument._get_text
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]