ooo-build r14990 - in trunk: . scratch/sd-pptutil/src
- From: thorstenb svn gnome org
- To: svn-commits-list gnome org
- Subject: ooo-build r14990 - in trunk: . scratch/sd-pptutil/src
- Date: Sat, 3 Jan 2009 01:28:16 +0000 (UTC)
Author: thorstenb
Date: Sat Jan 3 01:28:16 2009
New Revision: 14990
URL: http://svn.gnome.org/viewvc/ooo-build?rev=14990&view=rev
Log:
* scratch/sd-pptutil/*: more work on text style parsers, lots of
small improvements across the board
Modified:
trunk/ChangeLog
trunk/scratch/sd-pptutil/src/record.py
trunk/scratch/sd-pptutil/src/stream.py
Modified: trunk/scratch/sd-pptutil/src/record.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/record.py (original)
+++ trunk/scratch/sd-pptutil/src/record.py Sat Jan 3 01:28:16 2009
@@ -7,12 +7,14 @@
class BaseRecordHandler(object):
- def __init__ (self, recordType, recordInstance, size, bytes, prefix=''):
+ def __init__ (self, recordType, recordInstance, size, bytes, streamProperties, prefix='', propertyName=None):
self.recordType = recordType
self.recordInstance = recordInstance
self.size = size
self.bytes = bytes
self.lines = []
+ self.streamProperties = streamProperties
+ self.propertyName = propertyName
self.prefix = prefix
self.pos = 0 # current byte position
@@ -41,6 +43,13 @@
text = "%s: %s"%(name, self.getYesNo(value))
self.appendLine(text)
+ def appendProperty (self, value):
+ if self.propertyName is not None:
+ self.streamProperties[self.propertyName] = value
+
+ def isEmpty (self):
+ return len(self.bytes) <= self.pos
+
def readBytes (self, length):
r = self.bytes[self.pos:self.pos+length]
self.pos += length
@@ -88,15 +97,25 @@
def parseBytes (self):
name = globals.getTextBytes(self.readRemainingBytes())
+ self.appendProperty(name)
self.appendLine("text: '%s'"%name)
+def ShapeString (*args):
+ args += "ShapeText",
+ return String(*args)
+
class UniString(BaseRecordHandler):
"""Textual content."""
def parseBytes (self):
name = globals.getUTF8FromUTF16(globals.getTextBytes(self.readRemainingBytes()))
+ self.appendProperty(name)
self.appendLine("text: '%s'"%name)
+def ShapeUniString (*args):
+ args += "ShapeText",
+ return UniString(*args)
+
# -------------------------------------------------------------------
# special record handler: properties
@@ -106,7 +125,7 @@
def parseBytes (self):
# each prop entry takes 6 bytes; complex stuff comes after
# prop entries and fills remaining record space
- complexBytes = self.bytes[self.pos+self.recordInstance*6:]
+ allComplexBytes = self.bytes[self.pos+self.recordInstance*6:]
# recordInstance gives number of properties
for i in xrange(0, self.recordInstance):
@@ -116,47 +135,97 @@
isComplex = (propType & 0x8000) != 0
isBlip = ((propType & 0x4000) != 0) and not isComplex
propType = (propType & 0x3FFF)
+ complexBytes = []
+ if isComplex:
+ # eat propValue bytes from complexBytes
+ complexBytes = allComplexBytes[:propValue]
+ allComplexBytes = allComplexBytes[propValue:]
+
if propData.has_key(propType):
- handler = propData[propType][1](propType, propValue, isComplex, isBlip, self.appendLine)
+ handler = propData[propType][1](propType, propValue, isComplex, isBlip, complexBytes, self.appendLine)
handler.output()
else:
self.appendLine("%4.4Xh: [unknown property type: %4.4Xh, value: %8.8Xh, complex: %d, blip: %d]"%(propType, propValue, isComplex, isBlip))
# -------------------------------------------------------------------
+# special record handler: document atom
+
+class DocAtom(BaseRecordHandler):
+ """Document atom."""
+
+ def parseBytes (self):
+ slideWidth = self.readSignedInt(4)
+ slideHeight = self.readSignedInt(4)
+ notesWidth = self.readSignedInt(4)
+ notesHeight = self.readSignedInt(4)
+ oleWidth = self.readSignedInt(4)
+ oleHeight = self.readSignedInt(4)
+ notesMasterPersist = self.readUnsignedInt(4)
+ handoutMasterPersist = self.readUnsignedInt(4)
+ firstSlideNum = self.readUnsignedInt(2)
+ slideSizeType = self.readSignedInt(2)
+ savedWithFont = self.readUnsignedInt(1)
+ omitTitlePlace = self.readUnsignedInt(1)
+ right2Left = self.readUnsignedInt(1)
+ showComments = self.readUnsignedInt(1)
+
+ self.appendLine("Slide: (%d,%d), notes: (%d,%d), ole zoom: (%d,%d)"%(slideWidth, slideHeight,
+ notesWidth, notesHeight,
+ oleWidth, oleHeight))
+ self.appendLine("Notes master persist offset: %8.8Xh"%notesMasterPersist)
+ self.appendLine("Handout master persist offset: %8.8Xh"%handoutMasterPersist)
+ self.appendLine("1st slide num: %d, slide size type: %4.4Xh"%(firstSlideNum, slideSizeType))
+ self.appendLine("embedded fonts: %s, no placeholders on title slide: %s"%(savedWithFont,
+ omitTitlePlace))
+ self.appendLine("RTL doc: %s, show comment shapes: %s"%(right2Left, showComments))
+
+
+# -------------------------------------------------------------------
# special record handlers: text style properties
class TextStyles(BaseRecordHandler):
"""Text style properties."""
def parseBytes (self):
- # 4 bytes: total len of para attribs
+ # any shape text set? if not, no chance to calc run lengths
+ if not self.streamProperties.has_key("ShapeText"):
+ self.appendLine("no shape text given, skipping props")
+ return
+
+ textLen = len(self.streamProperties["ShapeText"])
+
+ # 4 bytes: <count> characters of shape text this para run is meant for
# <para attribs>
- # 4 bytes: total len of char attribs
- # <char attribs>
- paraAttribLen = self.readUnsignedInt(4)
- paraAttribEndPos = self.pos + paraAttribLen
- while self.pos < paraAttribEndPos:
- self.parseParaStyle()
+ # repeat until all shape text is consumed
+ charPos = 0
+ while not self.isEmpty() and charPos < textLen:
+ runLen = self.readUnsignedInt(4)
+ charPos += runLen
+ self.parseParaStyle(runLen)
self.appendLine("-"*61)
-
- charAttribLen = self.readUnsignedInt(4)
- charAttribEndPos = self.pos + charAttribLen
- while self.pos < charAttribEndPos:
- self.parseCharStyle()
+
+ # 4 bytes: <count> characters of shape text this char run is meant for
+ # <char attribs>
+ # repeat until all shape text is consumed
+ charPos = 0
+ while not self.isEmpty() and charPos < textLen:
+ runLen = self.readUnsignedInt(4)
+ charPos += runLen
+ self.parseCharStyle(runLen)
self.appendLine("-"*61)
-
+
def appendParaProp (self, text):
self.appendLine("para prop given: "+text)
def appendCharProp (self, text):
self.appendLine("char prop given: "+text)
- def parseParaStyle (self):
+ def parseParaStyle (self, runLen):
indentLevel = self.readUnsignedInt(2)
styleMask = self.readUnsignedInt(4)
- self.appendLine("para props for indent: %d"%indentLevel)
+ self.appendLine("para props for %d chars, indent: %d"%(runLen,indentLevel))
if styleMask & 0x000F:
bulletFlags = self.readUnsignedInt(2)
@@ -177,7 +246,7 @@
self.appendParaProp("bullet size %d"%bulletSize)
if styleMask & 0x0020:
- bulletColorAtom = ColorPropertyHandler(self.readUnsignedInt(2), self.readUnsignedInt(4), False, False, self.appendParaProp)
+ bulletColorAtom = ColorPropertyHandler(self.readUnsignedInt(2), self.readUnsignedInt(4), False, False, [], self.appendParaProp)
bulletColorAtom.output()
self.appendParaProp("bullet color atom")
@@ -234,9 +303,11 @@
paraTextDirection = self.readUnsignedInt(2)
self.appendParaProp("para text direction %4.4Xh"%paraTextDirection)
- def parseCharStyle (self):
+ def parseCharStyle (self, runLen):
styleMask = self.readUnsignedInt(4)
+ self.appendLine("char props for %d chars"%runLen)
+
if styleMask & 0xFFFF:
charFlags = self.readUnsignedInt(2)
self.appendCharProp("char flags %4.4Xh"%charFlags)
@@ -262,7 +333,7 @@
self.appendCharProp("char font size %d"%fontSize)
if styleMask & 0x40000:
- charColorAtom = ColorPropertyHandler(self.readUnsignedInt(2), self.readUnsignedInt(4), False, False, self.appendCharProp)
+ charColorAtom = ColorPropertyHandler(self.readUnsignedInt(2), self.readUnsignedInt(4), False, False, [], self.appendCharProp)
charColorAtom.output()
self.appendCharProp("char color atom")
@@ -277,17 +348,21 @@
class BasePropertyHandler():
"""Base property handler."""
- def __init__ (self, propType, propValue, isComplex, isBlip, printer):
+ def __init__ (self, propType, propValue, isComplex, isBlip, complexBytes, printer):
self.propType = propType
self.propValue = propValue
self.isComplex = isComplex
self.isBlip = isBlip
+ self.bytes = complexBytes
+ self.pos = 0
self.printer = printer
+ if propData.has_key(self.propType):
+ self.propEntry = propData[self.propType]
def output (self):
if propData.has_key(self.propType):
- propEntry = propData[self.propType]
- self.printer("%4.4Xh: %s = %8.8Xh [\"%s\" - default handler]"%(self.propType, propEntry[0], self.propValue, propEntry[2]))
+ self.printer("%4.4Xh: %s = %8.8Xh [\"%s\" - default handler]"%(self.propType, self.propEntry[0],
+ self.propValue, self.propEntry[2]))
class BoolPropertyHandler(BasePropertyHandler):
"""Bool properties."""
@@ -309,11 +384,39 @@
class MsoArrayPropertyHandler(BasePropertyHandler):
"""MsoArray property."""
+ def readBytes (self, length):
+ r = self.bytes[self.pos:self.pos+length]
+ self.pos += length
+ return r
+
+ def readUnsignedInt (self, length):
+ bytes = self.readBytes(length)
+ return globals.getUnsignedInt(bytes)
+
+ def output (self):
+ if self.isComplex:
+ numElements = self.readUnsignedInt(2)
+ dummy = self.readUnsignedInt(2)
+ elementSize = self.readUnsignedInt(2)
+ self.printer("%4.4Xh: %s: [\"%s\"]"%(self.propType, self.propEntry[0], self.propEntry[2]))
+ for i in xrange(0, numElements):
+ currElem = self.readUnsignedInt(elementSize)
+ self.printer("%4.4Xh: %d = %Xh"%(i,currElem))
+
class UniCharPropertyHandler(BasePropertyHandler):
"""unicode string property."""
+ def output (self):
+ if self.isComplex:
+ name = globals.getUTF8FromUTF16(globals.getTextBytes(self.bytes))
+ self.printer("%4.4Xh: %s = %s: [\"%s\"]"%(self.propType, self.propEntry[0], name, self.propEntry[2]))
+
class FixedPointHandler(BasePropertyHandler):
"""FixedPoint property."""
+
+ def output (self):
+ value = self.propValue / 65536.0
+ self.printer("%4.4Xh: %s = %f [\"%s\"]"%(self.propType, self.propEntry[0], value, self.propEntry[2]))
class ColorPropertyHandler(BasePropertyHandler):
"""Color property."""
@@ -339,8 +442,19 @@
class CharPropertyHandler(BasePropertyHandler):
"""string property."""
+ def output (self):
+ if self.isComplex:
+ name = globals.getTextBytes(self.bytes)
+ self.printer("%4.4Xh: %s = %s: [\"%s\"]"%(self.propType, self.propEntry[0], name, self.propEntry[2]))
+
class HandlesPropertyHandler(BasePropertyHandler):
- """string property."""
+ """handles property."""
+
+class ZipStoragePropertyHandler(BasePropertyHandler):
+ """zip storage."""
+
+ def output (self):
+ self.printer("zipped stuff")
# -------------------------------------------------------------------
# special record handler: properties
@@ -638,7 +752,7 @@
904: ["DFF_Prop_lidRegroup", LongPropertyHandler, "Regroup ID"],
927: ["DFF_Prop_tableProperties", LongPropertyHandler, ""],
928: ["DFF_Prop_tableRowProperties", LongPropertyHandler, ""],
- 937: ["DFF_Prop_xmlstuff", LongPropertyHandler, "Embedded ooxml"],
+ 937: ["DFF_Prop_xmlstuff", ZipStoragePropertyHandler, "Embedded ooxml"],
953: ["DFF_Prop_fEditedWrap", BoolPropertyHandler, "Has the wrap polygon been edited?"],
954: ["DFF_Prop_fBehindDocument", BoolPropertyHandler, "Word-only (shape is behind text)"],
955: ["DFF_Prop_fOnDblClickNotify", BoolPropertyHandler, "Notify client on a double click"],
Modified: trunk/scratch/sd-pptutil/src/stream.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/stream.py (original)
+++ trunk/scratch/sd-pptutil/src/stream.py Sat Jan 3 01:28:16 2009
@@ -18,7 +18,7 @@
7: ["DFF_PST_ClientSignal2"],
10: ["DFF_PST_PowerPointStateInfoAtom"],
1000: ["DFF_PST_Document"],
- 1001: ["DFF_PST_DocumentAtom"],
+ 1001: ["DFF_PST_DocumentAtom", record.DocAtom],
1002: ["DFF_PST_EndDocument"],
1003: ["DFF_PST_SlidePersist"],
1004: ["DFF_PST_SlideBase"],
@@ -84,22 +84,22 @@
3035: ["DFF_PST_OEShapeAtom"],
3998: ["DFF_PST_OutlineTextRefAtom"],
3999: ["DFF_PST_TextHeaderAtom"],
- 4000: ["DFF_PST_TextCharsAtom", record.UniString],
+ 4000: ["DFF_PST_TextCharsAtom", record.ShapeUniString],
4001: ["DFF_PST_StyleTextPropAtom", record.TextStyles],
- 4002: ["DFF_PST_BaseTextPropAtom"],
+ 4002: ["DFF_PST_BaseTextPropAtom", record.TextStyles],
4003: ["DFF_PST_TxMasterStyleAtom"],
4004: ["DFF_PST_TxCFStyleAtom"],
4005: ["DFF_PST_TxPFStyleAtom"],
4006: ["DFF_PST_TextRulerAtom"],
4007: ["DFF_PST_TextBookmarkAtom"],
- 4008: ["DFF_PST_TextBytesAtom", record.String],
+ 4008: ["DFF_PST_TextBytesAtom", record.ShapeString],
4009: ["DFF_PST_TxSIStyleAtom"],
4010: ["DFF_PST_TextSpecInfoAtom"],
4011: ["DFF_PST_DefaultRulerAtom"],
4023: ["DFF_PST_FontEntityAtom"],
4024: ["DFF_PST_FontEmbedData"],
4025: ["DFF_PST_TypeFace"],
- 4026: ["DFF_PST_CString"],
+ 4026: ["DFF_PST_CString", record.UniString],
4027: ["DFF_PST_ExternalObject"],
4033: ["DFF_PST_MetaFile"],
4034: ["DFF_PST_ExOleObj"],
@@ -202,7 +202,7 @@
0xF011: ["DFF_msofbtClientData"],
0xF11F: ["DFF_msofbtOleObject"],
0xF11D: ["DFF_msofbtDeletedPspl"],
-0xF122: ["DFF_msofbtUDefProp"],
+0xF122: ["DFF_msofbtUDefProp", record.Property],
0xF005: ["DFF_msofbtSolverContainer"],
0xF012: ["DFF_msofbtConnectorRule"],
0xF013: ["DFF_msofbtAlignRule"],
@@ -244,7 +244,7 @@
def __getDirectoryObj (self):
obj = self.header.getDirectory()
- if obj == None:
+ if obj is None:
return None
obj.parseDirEntries()
return obj
@@ -252,14 +252,14 @@
def printDirectory (self):
obj = self.__getDirectoryObj()
- if obj == None:
+ if obj is None:
return
obj.output()
def getDirectoryNames (self):
obj = self.__getDirectoryObj()
- if obj == None:
+ if obj is None:
return
return obj.getDirectoryNames()
@@ -267,7 +267,7 @@
def getDirectoryStreamByName (self, name):
obj = self.__getDirectoryObj()
bytes = []
- if obj != None:
+ if obj is not None:
bytes = obj.getRawStreamByName(name)
strm = PPTDirStream(bytes, self.params)
return strm
@@ -275,12 +275,13 @@
class PPTDirStream(object):
"""Represents one single powerpoint file subdirectory, like e.g. \"PowerPoint Document\"."""
- def __init__ (self, bytes, params, prefix=''):
+ def __init__ (self, bytes, params, prefix='', recordInfo=None):
self.bytes = bytes
self.size = len(self.bytes)
self.pos = 0
self.prefix = prefix
self.params = params
+ self.properties = {"recordInfo": recordInfo}
def readBytes (self, size=1):
@@ -305,8 +306,8 @@
def readRecords (self):
try:
- # read until data is exhausted
- while self.pos < self.size:
+ # read until data is exhausted (min record size: 8 bytes)
+ while self.pos+8 < self.size:
print("")
self.readRecord()
return True
@@ -336,6 +337,7 @@
print("")
if size > 0:
print("")
+ self.__printSep('-', 61, "%4.4Xh: "%recordType)
def readRecord (self):
@@ -348,19 +350,22 @@
self.printRecordHeader(startPos, recordInstance, recordVersion, recordType, size)
bytes = self.readBytes(size)
-
+
+ recordInfo = None
if recData.has_key(recordType) and len(recData[recordType]) >= 2:
- assert(recordVersion != 0x0F)
- # call special record handler, if any
- handler = recData[recordType][1](recordType, recordInstance, size, bytes, self.prefix)
+ recordInfo = recData[recordType]
+
+ if recordVersion == 0x0F:
+ # substream? recurse into that
+ subSubStrm = PPTDirStream(bytes, self.params, self.prefix+" ", recordInfo)
+ subSubStrm.readRecords()
+ elif recordInfo is not None:
+ handler = recordInfo[1](recordType, recordInstance, size, bytes, self.properties, self.prefix)
print("")
- if handler != None:
+ # call special record handler, if any
+ if handler is not None:
handler.output()
self.printRecordDump(bytes, recordType)
- elif recordVersion == 0x0F:
- # substream? recurse into that
- subSubStrm = PPTDirStream(bytes, self.params, self.prefix+" ")
- subSubStrm.readRecords()
elif size > 0:
print("")
self.printRecordDump(bytes, recordType)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]