ooo-build r14877 - in trunk: . scratch/sd-pptutil scratch/sd-pptutil/src
- From: thorstenb svn gnome org
- To: svn-commits-list gnome org
- Subject: ooo-build r14877 - in trunk: . scratch/sd-pptutil scratch/sd-pptutil/src
- Date: Tue, 16 Dec 2008 23:02:29 +0000 (UTC)
Author: thorstenb
Date: Tue Dec 16 23:02:29 2008
New Revision: 14877
* scratch/sd-pptutil/*: blatant rip-off from Kohei's binary xls
dumper; this here is a very bare-bones PPT one (whose src/*
subdir I better consolidate with Kohei's ...)
trunk/scratch/sd-pptutil/ (contents, props changed)
Added: trunk/scratch/sd-pptutil/
--- (empty file)
+++ trunk/scratch/sd-pptutil/ Tue Dec 16 23:02:29 2008
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+import sys, os.path, getopt
+import ole, stream, globals
+from globals import error
+def usage (exname):
+ exname = os.path.basename(exname)
+ msg = """Usage: %s [options] [ppt file]
+ --help displays this help message.
+ print msg
+class PPTDumper(object):
+ def __init__ (self, filepath, params):
+ self.filepath = filepath
+ self.params = params
+ def __printDirHeader (self, dirname, byteLen):
+ dirname = globals.decodeName(dirname)
+ print("")
+ print("="*68)
+ print("%s (size: %d bytes)"%(dirname, byteLen))
+ print("-"*68)
+ def dump (self):
+ file = open(self.filepath, 'rb')
+ strm = stream.PPTStream(, self.params)
+ file.close()
+ strm.printStreamInfo()
+ strm.printHeader()
+ strm.printDirectory()
+ dirnames = strm.getDirectoryNames()
+ for dirname in dirnames:
+ if len(dirname) == 0 or dirname == 'Root Entry':
+ continue
+ dirstrm = strm.getDirectoryStreamByName(dirname)
+ self.__printDirHeader(dirname, len(dirstrm.bytes))
+ if dirname == "Revision Log":
+ self.__readSubStream(dirstrm)
+ else:
+ globals.dumpBytes(dirstrm.bytes, 512)
+ def __readSubStream (self, strm):
+ try:
+ # read bytes from BOF to EOF.
+ header = 0x0000
+ while header != 0x000A:
+ header = strm.readRecord()
+ return True
+ except stream.EndOfStream:
+ return False
+def main (args):
+ exname, args = args[0], args[1:]
+ if len(args) < 1:
+ print("takes at least one argument")
+ usage(exname)
+ return
+ params = globals.Params()
+ try:
+ opts, args = getopt.getopt(args, "h", ["help", "debug", "show-sector-chain"])
+ for opt, arg in opts:
+ if opt in ['-h', '--help']:
+ usage(exname)
+ return
+ elif opt in ['--debug']:
+ params.debug = True
+ elif opt in ['--show-sector-chain']:
+ params.showSectorChain = True
+ else:
+ error("unknown option %s\n"%opt)
+ usage()
+ except getopt.GetoptError:
+ error("error parsing input options\n")
+ usage(exname)
+ return
+ dumper = PPTDumper(args[0], params)
+ dumper.dump()
+if __name__ == '__main__':
+ main(sys.argv)
Added: trunk/scratch/sd-pptutil/src/
--- (empty file)
+++ trunk/scratch/sd-pptutil/src/ Tue Dec 16 23:02:29 2008
@@ -0,0 +1,220 @@
+import sys, struct, math
+class ByteConvertError(Exception): pass
+class Params(object):
+ """command-line parameters."""
+ def __init__ (self):
+ self.debug = False
+ self.showSectorChain = False
+def output (msg):
+ sys.stdout.write(msg)
+def error (msg):
+ sys.stderr.write("Error: " + msg)
+def decodeName (name):
+ """decode name that contains unprintable characters."""
+ n = len(name)
+ if n == 0:
+ return name
+ newname = ''
+ for i in xrange(0, n):
+ if ord(name[i]) <= 20:
+ newname += "<%2.2Xh>"%ord(name[i])
+ else:
+ newname += name[i]
+ return newname
+def getRichText (bytes, textLen=None):
+ """parse a string of the rich-text format that Excel uses."""
+ flags = bytes[0]
+ if type(flags) == type('c'):
+ flags = ord(flags)
+ is16Bit = (flags & 0x01)
+ isFarEast = (flags & 0x04)
+ isRich = (flags & 0x08)
+ i = 1
+ formatRuns = 0
+ if isRich:
+ formatRuns = getSignedInt(bytes[i:i+2])
+ i += 2
+ extInfo = 0
+ if isFarEast:
+ extInfo = getSignedInt(bytes[i:i+4])
+ i += 4
+ extraBytes = 0
+ if textLen == None:
+ extraBytes = formatRuns*4 + extInfo
+ textLen = len(bytes) - extraBytes - i
+ totalByteLen = i + textLen + extraBytes
+ if is16Bit:
+ return ("<16-bit strings not supported yet>", totalByteLen)
+ text = toTextBytes(bytes[i:i+textLen])
+ return (text, totalByteLen)
+def dumpBytes (chars, subDivide=None):
+ line = 0
+ subDivideLine = None
+ if subDivide != None:
+ subDivideLine = subDivide/16
+ charLen = len(chars)
+ if charLen == 0:
+ # no bytes to dump.
+ return
+ labelWidth = int(math.ceil(math.log(charLen, 10)))
+ flushBytes = False
+ for i in xrange(0, charLen):
+ if (i+1)%16 == 1:
+ # print line header with seek position
+ fmt = "%%%d.%dd: "%(labelWidth, labelWidth)
+ output(fmt%i)
+ byte = ord(chars[i])
+ output("%2.2X "%byte)
+ flushBytes = True
+ if (i+1)%4 == 0:
+ # put extra space at every 4 bytes.
+ output(" ")
+ if (i+1)%16 == 0:
+ output("\n")
+ flushBytes = False
+ if subDivideLine != None and (line+1)%subDivideLine == 0:
+ output("\n")
+ line += 1
+ if flushBytes:
+ output("\n")
+def getSectorPos (secID, secSize):
+ return 512 + secID*secSize
+def getRawBytes (bytes, spaced=True, reverse=True):
+ text = ''
+ for b in bytes:
+ if type(b) == type(''):
+ b = ord(b)
+ if len(text) == 0:
+ text = "%2.2X"%b
+ elif spaced:
+ if reverse:
+ text = "%2.2X "%b + text
+ else:
+ text += " %2.2X"%b
+ else:
+ if reverse:
+ text = "%2.2X"%b + text
+ else:
+ text += "%2.2X"%b
+ return text
+def getTextBytes (bytes):
+ return toTextBytes(bytes)
+def toTextBytes (bytes):
+ n = len(bytes)
+ text = ''
+ for i in xrange(0, n):
+ b = bytes[i]
+ if type(b) == type(0x00):
+ b = struct.pack('B', b)
+ text += b
+ return text
+def getSignedInt (bytes):
+ # little endian
+ n = len(bytes)
+ if n == 0:
+ return 0
+ text = toTextBytes(bytes)
+ if n == 1:
+ # byte - 1 byte
+ return struct.unpack('b', text)[0]
+ elif n == 2:
+ # short - 2 bytes
+ return struct.unpack('<h', text)[0]
+ elif n == 4:
+ # int, long - 4 bytes
+ return struct.unpack('<l', text)[0]
+ raise ByteConvertError
+def getUnsignedInt (bytes):
+ # little endian
+ n = len(bytes)
+ if n == 0:
+ return 0
+ text = toTextBytes(bytes)
+ if n == 1:
+ # byte - 1 byte
+ return struct.unpack('B', text)[0]
+ elif n == 2:
+ # short - 2 bytes
+ return struct.unpack('<H', text)[0]
+ elif n == 4:
+ # int, long - 4 bytes
+ return struct.unpack('<L', text)[0]
+ raise ByteConvertError
+def getFloat (bytes):
+ n = len(bytes)
+ if n == 0:
+ return 0.0
+ text = toTextBytes(bytes)
+ return struct.unpack('<f', text)[0]
+def getDouble (bytes):
+ n = len(bytes)
+ if n == 0:
+ return 0.0
+ text = toTextBytes(bytes)
+ return struct.unpack('<d', text)[0]
+def getUTF8FromUTF16 (bytes):
+ # little endian utf-16 strings
+ byteCount = len(bytes)
+ loopCount = int(byteCount/2)
+ text = ''
+ for i in xrange(0, loopCount):
+ code = ''
+ if bytes[i*2+1] != '\x00':
+ code += bytes[i*2+1]
+ if bytes[i*2] != '\x00':
+ code += bytes[i*2]
+ text += unicode(code, 'utf-8')
+ return text
Added: trunk/scratch/sd-pptutil/src/
--- (empty file)
+++ trunk/scratch/sd-pptutil/src/ Tue Dec 16 23:02:29 2008
@@ -0,0 +1,732 @@
+import sys
+import stream, globals
+from globals import getSignedInt
+# ----------------------------------------------------------------------------
+# Reference: The Microsoft Compound Document File Format by Daniel Rentz
+# ----------------------------------------------------------------------------
+from globals import output
+class NoRootStorage(Exception): pass
+class ByteOrder:
+ LittleEndian = 0
+ BigEndian = 1
+ Unknown = 2
+class BlockType:
+ MSAT = 0
+ SAT = 1
+ SSAT = 2
+ Directory = 3
+class StreamLocation:
+ SAT = 0
+ SSAT = 1
+class Header(object):
+ @staticmethod
+ def byteOrder (chars):
+ b1, b2 = ord(chars[0]), ord(chars[1])
+ if b1 == 0xFE and b2 == 0xFF:
+ return ByteOrder.LittleEndian
+ elif b1 == 0xFF and b2 == 0xFE:
+ return ByteOrder.BigEndian
+ else:
+ return ByteOrder.Unknown
+ def __init__ (self, bytes, params):
+ self.bytes = bytes
+ self.MSAT = None
+ self.docId = None
+ self.uId = None
+ self.revision = 0
+ self.version = 0
+ self.byteOrder = ByteOrder.Unknown
+ self.minStreamSize = 0
+ self.numSecMSAT = 0
+ self.numSecSSAT = 0
+ self.numSecSAT = 0
+ self.__secIDFirstMSAT = -2
+ self.__secIDFirstDirStrm = -2
+ self.__secIDFirstSSAT = -2
+ self.secSize = 512
+ self.secSizeShort = 64
+ self.params = params
+ def getSectorSize (self):
+ return 2**self.secSize
+ def getShortSectorSize (self):
+ return 2**self.secSizeShort
+ def getFirstSectorID (self, blockType):
+ if blockType == BlockType.MSAT:
+ return self.__secIDFirstMSAT
+ elif blockType == BlockType.SSAT:
+ return self.__secIDFirstSSAT
+ elif blockType == BlockType.Directory:
+ return self.__secIDFirstDirStrm
+ return -2
+ def output (self):
+ def printRawBytes (bytes):
+ for b in bytes:
+ output("%2.2X "%ord(b))
+ output("\n")
+ def printSep (c='-', w=68, prefix=''):
+ print(prefix + c*w)
+ printSep('=', 68)
+ print("Compound Document Header")
+ printSep('-', 68)
+ if self.params.debug:
+ globals.dumpBytes(self.bytes[0:512])
+ printSep('-', 68)
+ # document ID and unique ID
+ output("Document ID: ")
+ printRawBytes(self.docId)
+ output("Unique ID: ")
+ printRawBytes(self.uId)
+ # revision and version
+ print("Revision: %d Version: %d"%(self.revision, self.version))
+ # byte order
+ output("Byte order: ")
+ if self.byteOrder == ByteOrder.LittleEndian:
+ print("little endian")
+ elif self.byteOrder == ByteOrder.BigEndian:
+ print("big endian")
+ else:
+ print("unknown")
+ # sector size (usually 512 bytes)
+ print("Sector size: %d (%d)"%(2**self.secSize, self.secSize))
+ # short sector size (usually 64 bytes)
+ print("Short sector size: %d (%d)"%(2**self.secSizeShort, self.secSizeShort))
+ # total number of sectors in SAT (equals the number of sector IDs
+ # stored in the MSAT).
+ print("Total number of sectors used in SAT: %d"%self.numSecSAT)
+ print("Sector ID of the first sector of the directory stream: %d"%
+ self.__secIDFirstDirStrm)
+ print("Minimum stream size: %d"%self.minStreamSize)
+ if self.__secIDFirstSSAT == -2:
+ print("Sector ID of the first SSAT sector: [none]")
+ else:
+ print("Sector ID of the first SSAT sector: %d"%self.__secIDFirstSSAT)
+ print("Total number of sectors used in SSAT: %d"%self.numSecSSAT)
+ if self.__secIDFirstMSAT == -2:
+ # There is no more sector ID stored outside the header.
+ print("Sector ID of the first MSAT sector: [end of chain]")
+ else:
+ # There is more sector IDs than 109 IDs stored in the header.
+ print("Sector ID of the first MSAT sector: %d"%(self.__secIDFirstMSAT))
+ print("Total number of sectors used to store additional MSAT: %d"%self.numSecMSAT)
+ def parse (self):
+ # document ID and unique ID
+ self.docId = self.bytes[0:8]
+ self.uId = self.bytes[8:24]
+ # revision and version
+ self.revision = getSignedInt(self.bytes[24:26])
+ self.version = getSignedInt(self.bytes[26:28])
+ # byte order
+ self.byteOrder = Header.byteOrder(self.bytes[28:30])
+ # sector size (usually 512 bytes)
+ self.secSize = getSignedInt(self.bytes[30:32])
+ # short sector size (usually 64 bytes)
+ self.secSizeShort = getSignedInt(self.bytes[32:34])
+ # total number of sectors in SAT (equals the number of sector IDs
+ # stored in the MSAT).
+ self.numSecSAT = getSignedInt(self.bytes[44:48])
+ self.__secIDFirstDirStrm = getSignedInt(self.bytes[48:52])
+ self.minStreamSize = getSignedInt(self.bytes[56:60])
+ self.__secIDFirstSSAT = getSignedInt(self.bytes[60:64])
+ self.numSecSSAT = getSignedInt(self.bytes[64:68])
+ self.__secIDFirstMSAT = getSignedInt(self.bytes[68:72])
+ self.numSecMSAT = getSignedInt(self.bytes[72:76])
+ # master sector allocation table
+ self.MSAT = MSAT(2**self.secSize, self.bytes, self.params)
+ # First part of MSAT consisting of an array of up to 109 sector IDs.
+ # Each sector ID is 4 bytes in length.
+ for i in xrange(0, 109):
+ pos = 76 + i*4
+ id = getSignedInt(self.bytes[pos:pos+4])
+ if id == -1:
+ break
+ self.MSAT.appendSectorID(id)
+ if self.__secIDFirstMSAT != -2:
+ # additional sectors are used to store more SAT sector IDs.
+ secID = self.__secIDFirstMSAT
+ size = self.getSectorSize()
+ inLoop = True
+ while inLoop:
+ pos = 512 + secID*size
+ bytes = self.bytes[pos:pos+size]
+ n = int(size/4)
+ for i in xrange(0, n):
+ pos = i*4
+ id = getSignedInt(bytes[pos:pos+4])
+ if id < 0:
+ inLoop = False
+ break
+ elif i == n-1:
+ # last sector ID - points to the next MSAT sector.
+ secID = id
+ break
+ else:
+ self.MSAT.appendSectorID(id)
+ return 512
+ def getMSAT (self):
+ return self.MSAT
+ def getSAT (self):
+ return self.MSAT.getSAT()
+ def getSSAT (self):
+ ssatID = self.getFirstSectorID(BlockType.SSAT)
+ if ssatID < 0:
+ return None
+ chain = self.getSAT().getSectorIDChain(ssatID)
+ if len(chain) == 0:
+ return None
+ obj = SSAT(2**self.secSize, self.bytes, self.params)
+ for secID in chain:
+ obj.addSector(secID)
+ obj.buildArray()
+ return obj
+ def getDirectory (self):
+ dirID = self.getFirstSectorID(BlockType.Directory)
+ if dirID < 0:
+ return None
+ chain = self.getSAT().getSectorIDChain(dirID)
+ if len(chain) == 0:
+ return None
+ obj = Directory(self, self.params)
+ for secID in chain:
+ obj.addSector(secID)
+ return obj
+ def dummy ():
+ pass
+class MSAT(object):
+ """Master Sector Allocation Table (MSAT)
+This class represents the master sector allocation table (MSAT) that stores
+sector IDs that point to all the sectors that are used by the sector
+allocation table (SAT). The actual SAT are to be constructed by combining
+all the sectors pointed by the sector IDs in order of occurrence.
+ def __init__ (self, sectorSize, bytes, params):
+ self.sectorSize = sectorSize
+ self.secIDs = []
+ self.bytes = bytes
+ self.__SAT = None
+ self.params = params
+ def appendSectorID (self, id):
+ self.secIDs.append(id)
+ def output (self):
+ print('')
+ print("="*68)
+ print("Master Sector Allocation Table (MSAT)")
+ print("-"*68)
+ for id in self.secIDs:
+ print("sector ID: %5d (pos: %7d)"%(id, 512+id*self.sectorSize))
+ def getSATSectorPosList (self):
+ list = []
+ for id in self.secIDs:
+ pos = 512 + id*self.sectorSize
+ list.append([id, pos])
+ return list
+ def getSAT (self):
+ if self.__SAT != None:
+ return self.__SAT
+ obj = SAT(self.sectorSize, self.bytes, self.params)
+ for id in self.secIDs:
+ obj.addSector(id)
+ obj.buildArray()
+ self.__SAT = obj
+ return self.__SAT
+class SAT(object):
+ """Sector Allocation Table (SAT)
+ def __init__ (self, sectorSize, bytes, params):
+ self.sectorSize = sectorSize
+ self.sectorIDs = []
+ self.bytes = bytes
+ self.array = []
+ self.params = params
+ def getSectorSize (self):
+ return self.sectorSize
+ def addSector (self, id):
+ self.sectorIDs.append(id)
+ def buildArray (self):
+ if len(self.array) > 0:
+ # array already built.
+ return
+ numItems = int(self.sectorSize/4)
+ self.array = []
+ for secID in self.sectorIDs:
+ pos = 512 + secID*self.sectorSize
+ for i in xrange(0, numItems):
+ beginPos = pos + i*4
+ id = getSignedInt(self.bytes[beginPos:beginPos+4])
+ self.array.append(id)
+ def outputRawBytes (self):
+ bytes = []
+ for secID in self.sectorIDs:
+ pos = 512 + secID*self.sectorSize
+ bytes.extend(self.bytes[pos:pos+self.sectorSize])
+ globals.dumpBytes(bytes, 512)
+ def outputArrayStats (self):
+ sectorTotal = len(self.array)
+ sectorP = 0 # >= 0
+ sectorM1 = 0 # -1
+ sectorM2 = 0 # -2
+ sectorM3 = 0 # -3
+ sectorM4 = 0 # -4
+ sectorMElse = 0 # < -4
+ sectorLiveTotal = 0
+ for i in xrange(0, len(self.array)):
+ item = self.array[i]
+ if item >= 0:
+ sectorP += 1
+ elif item == -1:
+ sectorM1 += 1
+ elif item == -2:
+ sectorM2 += 1
+ elif item == -3:
+ sectorM3 += 1
+ elif item == -4:
+ sectorM4 += 1
+ elif item < -4:
+ sectorMElse += 1
+ else:
+ sectorLiveTotal += 1
+ print("total sector count: %4d"%sectorTotal)
+ print("* live sector count: %4d"%sectorP)
+ print("* end-of-chain sector count: %4d"%sectorM2) # end-of-chain is also live
+ print("* free sector count: %4d"%sectorM1)
+ print("* SAT sector count: %4d"%sectorM3)
+ print("* MSAT sector count: %4d"%sectorM4)
+ print("* other sector count: %4d"%sectorMElse)
+ def output (self):
+ print('')
+ print("="*68)
+ print("Sector Allocation Table (SAT)")
+ print("-"*68)
+ if self.params.debug:
+ self.outputRawBytes()
+ print("-"*68)
+ for i in xrange(0, len(self.array)):
+ print("%5d: %5d"%(i, self.array[i]))
+ print("-"*68)
+ self.outputArrayStats()
+ def getSectorIDChain (self, initID):
+ if initID < 0:
+ return []
+ chain = [initID]
+ nextID = self.array[initID]
+ while nextID != -2:
+ chain.append(nextID)
+ nextID = self.array[nextID]
+ return chain
+class SSAT(SAT):
+ """Short Sector Allocation Table (SSAT)
+SSAT contains an array of sector ID chains of all short streams, as oppposed
+to SAT which contains an array of sector ID chains of all standard streams.
+The sector IDs included in the SSAT point to the short sectors in the short
+stream container stream.
+The first sector ID of SSAT is in the header, and the IDs of the remaining
+sectors are contained in the SAT as a sector ID chain.
+ def output (self):
+ print('')
+ print("="*68)
+ print("Short Sector Allocation Table (SSAT)")
+ print("-"*68)
+ if self.params.debug:
+ self.outputRawBytes()
+ print("-"*68)
+ for i in xrange(0, len(self.array)):
+ item = self.array[i]
+ output("%3d : %3d\n"%(i, item))
+ self.outputArrayStats()
+class Directory(object):
+ """Directory Entries
+This stream contains a list of directory entries that are stored within the
+entire file stream.
+ class Type:
+ Empty = 0
+ UserStorage = 1
+ UserStream = 2
+ LockBytes = 3
+ Property = 4
+ RootStorage = 5
+ class NodeColor:
+ Red = 0
+ Black = 1
+ Unknown = 99
+ class Entry:
+ def __init__ (self):
+ self.Name = ''
+ self.CharBufferSize = 0
+ self.Type = Directory.Type.Empty
+ self.NodeColor = Directory.NodeColor.Unknown
+ self.DirIDLeft = -1
+ self.DirIDRight = -1
+ self.DirIDRoot = -1
+ self.UniqueID = None
+ self.UserFlags = None
+ self.TimeCreated = None
+ self.TimeModified = None
+ self.StreamSectorID = -2
+ self.StreamSize = 0
+ self.bytes = []
+ def __init__ (self, header, params):
+ self.sectorSize = header.getSectorSize()
+ self.bytes = header.bytes
+ self.minStreamSize = header.minStreamSize
+ self.sectorIDs = []
+ self.entries = []
+ self.SAT = header.getSAT()
+ self.SSAT = header.getSSAT()
+ self.header = header
+ self.RootStorage = None
+ self.RootStorageBytes = []
+ self.params = params
+ def __buildRootStorageBytes (self):
+ if self.RootStorage == None:
+ # no root storage exists.
+ return
+ firstSecID = self.RootStorage.StreamSectorID
+ chain = self.header.getSAT().getSectorIDChain(firstSecID)
+ for secID in chain:
+ pos = 512 + secID*self.sectorSize
+ self.RootStorageBytes.extend(self.header.bytes[pos:pos+self.sectorSize])
+ def __getRawStream (self, entry):
+ chain = []
+ if entry.StreamLocation == StreamLocation.SAT:
+ chain = self.header.getSAT().getSectorIDChain(entry.StreamSectorID)
+ elif entry.StreamLocation == StreamLocation.SSAT:
+ chain = self.header.getSSAT().getSectorIDChain(entry.StreamSectorID)
+ if entry.StreamLocation == StreamLocation.SSAT:
+ # Get the root storage stream.
+ if self.RootStorage == None:
+ raise NoRootStorage
+ bytes = []
+ self.__buildRootStorageBytes()
+ size = self.header.getShortSectorSize()
+ for id in chain:
+ pos = id*size
+ bytes.extend(self.RootStorageBytes[pos:pos+size])
+ return bytes
+ offset = 512
+ size = self.header.getSectorSize()
+ bytes = []
+ for id in chain:
+ pos = offset + id*size
+ bytes.extend(self.header.bytes[pos:pos+size])
+ return bytes
+ def getRawStreamByName (self, name):
+ bytes = []
+ for entry in self.entries:
+ if entry.Name == name:
+ bytes = self.__getRawStream(entry)
+ break
+ return bytes
+ def addSector (self, id):
+ self.sectorIDs.append(id)
+ def output (self, debug=False):
+ print('')
+ print("="*68)
+ print("Directory")
+ if debug:
+ print("-"*68)
+ print("sector(s) used:")
+ for secID in self.sectorIDs:
+ print(" sector %d"%secID)
+ print("")
+ for secID in self.sectorIDs:
+ print("-"*68)
+ print(" Raw Hex Dump (sector %d)"%secID)
+ print("-"*68)
+ pos = globals.getSectorPos(secID, self.sectorSize)
+ globals.dumpBytes(self.bytes[pos:pos+self.sectorSize], 128)
+ for entry in self.entries:
+ self.__outputEntry(entry, debug)
+ def __outputEntry (self, entry, debug):
+ print("-"*68)
+ if len(entry.Name) > 0:
+ name = entry.Name
+ if ord(name[0]) <= 5:
+ name = "<%2.2Xh>%s"%(ord(name[0]), name[1:])
+ print("name: %s (name buffer size: %d bytes)"%(name, entry.CharBufferSize))
+ else:
+ print("name: [empty] (name buffer size: %d bytes)"%entry.CharBufferSize)
+ if self.params.debug:
+ print("-"*68)
+ globals.dumpBytes(entry.bytes)
+ print("-"*68)
+ output("type: ")
+ if entry.Type == Directory.Type.Empty:
+ print("empty")
+ elif entry.Type == Directory.Type.LockBytes:
+ print("lock bytes")
+ elif entry.Type == Directory.Type.Property:
+ print("property")
+ elif entry.Type == Directory.Type.RootStorage:
+ print("root storage")
+ elif entry.Type == Directory.Type.UserStorage:
+ print("user storage")
+ elif entry.Type == Directory.Type.UserStream:
+ print("user stream")
+ else:
+ print("[unknown type]")
+ output("node color: ")
+ if entry.NodeColor == Directory.NodeColor.Red:
+ print("red")
+ elif entry.NodeColor == Directory.NodeColor.Black:
+ print("black")
+ elif entry.NodeColor == Directory.NodeColor.Unknown:
+ print("[unknown color]")
+ print("linked dir entries: left: %d; right: %d; root: %d"%
+ (entry.DirIDLeft, entry.DirIDRight, entry.DirIDRoot))
+ self.__outputRaw("unique ID", entry.UniqueID)
+ self.__outputRaw("user flags", entry.UserFlags)
+ self.__outputRaw("time created", entry.TimeCreated)
+ self.__outputRaw("time last modified", entry.TimeModified)
+ output("stream info: ")
+ if entry.StreamSectorID < 0:
+ print("[empty stream]")
+ else:
+ strmLoc = "SAT"
+ if entry.StreamLocation == StreamLocation.SSAT:
+ strmLoc = "SSAT"
+ print("(first sector ID: %d; size: %d; location: %s)"%
+ (entry.StreamSectorID, entry.StreamSize, strmLoc))
+ satObj = None
+ secSize = 0
+ if entry.StreamLocation == StreamLocation.SAT:
+ satObj = self.SAT
+ secSize = self.header.getSectorSize()
+ elif entry.StreamLocation == StreamLocation.SSAT:
+ satObj = self.SSAT
+ secSize = self.header.getShortSectorSize()
+ if satObj != None:
+ chain = satObj.getSectorIDChain(entry.StreamSectorID)
+ print("sector count: %d"%len(chain))
+ print("total sector size: %d"%(len(chain)*secSize))
+ if self.params.showSectorChain:
+ self.__outputSectorChain(chain)
+ def __outputSectorChain (self, chain):
+ line = "sector chain: "
+ lineLen = len(line)
+ for id in chain:
+ frag = "%d, "%id
+ fragLen = len(frag)
+ if lineLen + fragLen > 68:
+ print(line)
+ line = frag
+ lineLen = fragLen
+ else:
+ line += frag
+ lineLen += fragLen
+ if line[-2:] == ", ":
+ line = line[:-2]
+ lineLen -= 2
+ if lineLen > 0:
+ print(line)
+ def __outputRaw (self, name, bytes):
+ if bytes == None:
+ return
+ output("%s: "%name)
+ for byte in bytes:
+ output("%2.2X "%ord(byte))
+ print("")
+ def getDirectoryNames (self):
+ names = []
+ for entry in self.entries:
+ names.append(entry.Name)
+ return names
+ def parseDirEntries (self):
+ if len(self.entries):
+ # directory entries already built
+ return
+ # combine all sectors first.
+ bytes = []
+ for secID in self.sectorIDs:
+ pos = globals.getSectorPos(secID, self.sectorSize)
+ bytes.extend(self.bytes[pos:pos+self.sectorSize])
+ self.entries = []
+ # each directory entry is exactly 128 bytes.
+ numEntries = int(len(bytes)/128)
+ if numEntries == 0:
+ return
+ for i in xrange(0, numEntries):
+ pos = i*128
+ self.entries.append(self.parseDirEntry(bytes[pos:pos+128]))
+ def parseDirEntry (self, bytes):
+ entry = Directory.Entry()
+ entry.bytes = bytes
+ name = globals.getUTF8FromUTF16(bytes[0:64])
+ entry.Name = name
+ entry.CharBufferSize = getSignedInt(bytes[64:66])
+ entry.Type = getSignedInt(bytes[66:67])
+ entry.NodeColor = getSignedInt(bytes[67:68])
+ entry.DirIDLeft = getSignedInt(bytes[68:72])
+ entry.DirIDRight = getSignedInt(bytes[72:76])
+ entry.DirIDRoot = getSignedInt(bytes[76:80])
+ entry.UniqueID = bytes[80:96]
+ entry.UserFlags = bytes[96:100]
+ entry.TimeCreated = bytes[100:108]
+ entry.TimeModified = bytes[108:116]
+ entry.StreamSectorID = getSignedInt(bytes[116:120])
+ entry.StreamSize = getSignedInt(bytes[120:124])
+ entry.StreamLocation = StreamLocation.SAT
+ if entry.Type != Directory.Type.RootStorage and \
+ entry.StreamSize < self.header.minStreamSize:
+ entry.StreamLocation = StreamLocation.SSAT
+ if entry.Type == Directory.Type.RootStorage and entry.StreamSectorID >= 0:
+ # This is an existing root storage.
+ self.RootStorage = entry
+ return entry
Added: trunk/scratch/sd-pptutil/src/
--- (empty file)
+++ trunk/scratch/sd-pptutil/src/ Tue Dec 16 23:02:29 2008
@@ -0,0 +1,81 @@
+import struct
+import globals
+# -------------------------------------------------------------------
+# record handler classes
+class BaseRecordHandler(object):
+ def __init__ (self, header, size, bytes, strmData):
+ self.header = header
+ self.size = size
+ self.bytes = bytes
+ self.lines = []
+ self.pos = 0 # current byte position
+ self.strmData = strmData
+ def parseBytes (self):
+ """Parse the original bytes and generate human readable output.
+The derived class should only worry about overwriting this function. The
+bytes are given as self.bytes, and call self.appendLine([new line]) to
+append a line to be displayed.
+ pass
+ def output (self):
+ self.parseBytes()
+ print("%4.4Xh: %s"%(self.header, "-"*61))
+ for line in self.lines:
+ print("%4.4Xh: %s"%(self.header, line))
+ def appendLine (self, line):
+ self.lines.append(line)
+ def appendLineBoolean (self, name, value):
+ text = "%s: %s"%(name, self.getYesNo(value))
+ self.appendLine(text)
+ def readBytes (self, length):
+ r = self.bytes[self.pos:self.pos+length]
+ self.pos += length
+ return r
+ def readRemainingBytes (self):
+ r = self.bytes[self.pos:]
+ self.pos = self.size
+ return r
+ def getCurrentPos (self):
+ return self.pos
+ def setCurrentPos (self, pos):
+ self.pos = pos
+ def getYesNo (self, boolVal):
+ if boolVal:
+ return 'yes'
+ else:
+ return 'no'
+ def getTrueFalse (self, boolVal):
+ if boolVal:
+ return 'true'
+ else:
+ return 'false'
+ def readUnsignedInt (self, length):
+ bytes = self.readBytes(length)
+ return globals.getUnsignedInt(bytes)
+ def readSignedInt (self, length):
+ bytes = self.readBytes(length)
+ return globals.getSignedInt(bytes)
+ def readDouble (self):
+ # double is always 8 bytes.
+ bytes = self.readBytes(8)
+ return globals.getDouble(bytes)
Added: trunk/scratch/sd-pptutil/src/
--- (empty file)
+++ trunk/scratch/sd-pptutil/src/ Tue Dec 16 23:02:29 2008
@@ -0,0 +1,347 @@
+import sys
+import ole, globals, record
+from globals import output
+class EndOfStream(Exception): pass
+ # opcode: [canonical name, handler (optional)]
+recData = {
+ 1: ["DFF_PST_SubContainerCompleted"],
+ 2: ["DFF_PST_IRRAtom"],
+ 3: ["DFF_PST_PSS"],
+ 4: ["DFF_PST_SubContainerException"],
+ 6: ["DFF_PST_ClientSignal1"],
+ 7: ["DFF_PST_ClientSignal2"],
+ 10: ["DFF_PST_PowerPointStateInfoAtom"],
+ 1000: ["DFF_PST_Document"],
+ 1001: ["DFF_PST_DocumentAtom"],
+ 1002: ["DFF_PST_EndDocument"],
+ 1003: ["DFF_PST_SlidePersist"],
+ 1004: ["DFF_PST_SlideBase"],
+ 1005: ["DFF_PST_SlideBaseAtom"],
+ 1006: ["DFF_PST_Slide"],
+ 1007: ["DFF_PST_SlideAtom"],
+ 1008: ["DFF_PST_Notes"],
+ 1009: ["DFF_PST_NotesAtom"],
+ 1010: ["DFF_PST_Environment"],
+ 1011: ["DFF_PST_SlidePersistAtom"],
+ 1012: ["DFF_PST_Scheme"],
+ 1013: ["DFF_PST_SchemeAtom"],
+ 1014: ["DFF_PST_DocViewInfo"],
+ 1015: ["DFF_PST_SslideLayoutAtom"],
+ 1016: ["DFF_PST_MainMaster"],
+ 1017: ["DFF_PST_SSSlideInfoAtom"],
+ 1018: ["DFF_PST_SlideViewInfo"],
+ 1019: ["DFF_PST_GuideAtom"],
+ 1020: ["DFF_PST_ViewInfo"],
+ 1021: ["DFF_PST_ViewInfoAtom"],
+ 1022: ["DFF_PST_SlideViewInfoAtom"],
+ 1023: ["DFF_PST_VBAInfo"],
+ 1024: ["DFF_PST_VBAInfoAtom"],
+ 1025: ["DFF_PST_SSDocInfoAtom"],
+ 1026: ["DFF_PST_Summary"],
+ 1027: ["DFF_PST_Texture"],
+ 1028: ["DFF_PST_VBASlideInfo"],
+ 1029: ["DFF_PST_VBASlideInfoAtom"],
+ 1030: ["DFF_PST_DocRoutingSlip"],
+ 1031: ["DFF_PST_OutlineViewInfo"],
+ 1032: ["DFF_PST_SorterViewInfo"],
+ 1033: ["DFF_PST_ExObjList"],
+ 1034: ["DFF_PST_ExObjListAtom"],
+ 1035: ["DFF_PST_PPDrawingGroup"],
+ 1036: ["DFF_PST_PPDrawing"],
+ 1040: ["DFF_PST_NamedShows"],
+ 1041: ["DFF_PST_NamedShow"],
+ 1042: ["DFF_PST_NamedShowSlides"],
+ 1055: ["DFF_PST_RoundTripShapeId"],
+ 2000: ["DFF_PST_List"],
+ 2005: ["DFF_PST_FontCollection"],
+ 2017: ["DFF_PST_ListPlaceholder"],
+ 2019: ["DFF_PST_BookmarkCollection"],
+ 2020: ["DFF_PST_SoundCollection"],
+ 2021: ["DFF_PST_SoundCollAtom"],
+ 2022: ["DFF_PST_Sound"],
+ 2023: ["DFF_PST_SoundData"],
+ 2025: ["DFF_PST_BookmarkSeedAtom"],
+ 2026: ["DFF_PST_GuideList"],
+ 2028: ["DFF_PST_RunArray"],
+ 2029: ["DFF_PST_RunArrayAtom"],
+ 2030: ["DFF_PST_ArrayElementAtom"],
+ 2031: ["DFF_PST_Int4ArrayAtom"],
+ 2032: ["DFF_PST_ColorSchemeAtom"],
+ 3008: ["DFF_PST_OEShape"],
+ 3009: ["DFF_PST_ExObjRefAtom"],
+ 3011: ["DFF_PST_OEPlaceholderAtom"],
+ 3020: ["DFF_PST_GrColor"],
+ 3025: ["DFF_PST_GrectAtom"],
+ 3031: ["DFF_PST_GratioAtom"],
+ 3032: ["DFF_PST_Gscaling"],
+ 3034: ["DFF_PST_GpointAtom"],
+ 3035: ["DFF_PST_OEShapeAtom"],
+ 3998: ["DFF_PST_OutlineTextRefAtom"],
+ 3999: ["DFF_PST_TextHeaderAtom"],
+ 4000: ["DFF_PST_TextCharsAtom"],
+ 4001: ["DFF_PST_StyleTextPropAtom"],
+ 4002: ["DFF_PST_BaseTextPropAtom"],
+ 4003: ["DFF_PST_TxMasterStyleAtom"],
+ 4004: ["DFF_PST_TxCFStyleAtom"],
+ 4005: ["DFF_PST_TxPFStyleAtom"],
+ 4006: ["DFF_PST_TextRulerAtom"],
+ 4007: ["DFF_PST_TextBookmarkAtom"],
+ 4008: ["DFF_PST_TextBytesAtom"],
+ 4009: ["DFF_PST_TxSIStyleAtom"],
+ 4010: ["DFF_PST_TextSpecInfoAtom"],
+ 4011: ["DFF_PST_DefaultRulerAtom"],
+ 4023: ["DFF_PST_FontEntityAtom"],
+ 4024: ["DFF_PST_FontEmbedData"],
+ 4025: ["DFF_PST_TypeFace"],
+ 4026: ["DFF_PST_CString"],
+ 4027: ["DFF_PST_ExternalObject"],
+ 4033: ["DFF_PST_MetaFile"],
+ 4034: ["DFF_PST_ExOleObj"],
+ 4035: ["DFF_PST_ExOleObjAtom"],
+ 4036: ["DFF_PST_ExPlainLinkAtom"],
+ 4037: ["DFF_PST_CorePict"],
+ 4038: ["DFF_PST_CorePictAtom"],
+ 4039: ["DFF_PST_ExPlainAtom"],
+ 4040: ["DFF_PST_SrKinsoku"],
+ 4041: ["DFF_PST_Handout"],
+ 4044: ["DFF_PST_ExEmbed"],
+ 4045: ["DFF_PST_ExEmbedAtom"],
+ 4046: ["DFF_PST_ExLink"],
+ 4047: ["DFF_PST_ExLinkAtom_old"],
+ 4048: ["DFF_PST_BookmarkEntityAtom"],
+ 4049: ["DFF_PST_ExLinkAtom"],
+ 4050: ["DFF_PST_SrKinsokuAtom"],
+ 4051: ["DFF_PST_ExHyperlinkAtom"],
+ 4053: ["DFF_PST_ExPlain"],
+ 4054: ["DFF_PST_ExPlainLink"],
+ 4055: ["DFF_PST_ExHyperlink"],
+ 4056: ["DFF_PST_SlideNumberMCAtom"],
+ 4057: ["DFF_PST_HeadersFooters"],
+ 4058: ["DFF_PST_HeadersFootersAtom"],
+ 4062: ["DFF_PST_RecolorEntryAtom"],
+ 4063: ["DFF_PST_TxInteractiveInfoAtom"],
+ 4065: ["DFF_PST_EmFormatAtom"],
+ 4066: ["DFF_PST_CharFormatAtom"],
+ 4067: ["DFF_PST_ParaFormatAtom"],
+ 4068: ["DFF_PST_MasterText"],
+ 4071: ["DFF_PST_RecolorInfoAtom"],
+ 4073: ["DFF_PST_ExQuickTime"],
+ 4074: ["DFF_PST_ExQuickTimeMovie"],
+ 4075: ["DFF_PST_ExQuickTimeMovieData"],
+ 4076: ["DFF_PST_ExSubscription"],
+ 4077: ["DFF_PST_ExSubscriptionSection"],
+ 4078: ["DFF_PST_ExControl"],
+ 4091: ["DFF_PST_ExControlAtom"],
+ 4080: ["DFF_PST_SlideListWithText"],
+ 4081: ["DFF_PST_AnimationInfoAtom"],
+ 4082: ["DFF_PST_InteractiveInfo"],
+ 4083: ["DFF_PST_InteractiveInfoAtom"],
+ 4084: ["DFF_PST_SlideList"],
+ 4085: ["DFF_PST_UserEditAtom"],
+ 4086: ["DFF_PST_CurrentUserAtom"],
+ 4087: ["DFF_PST_DateTimeMCAtom"],
+ 4088: ["DFF_PST_GenericDateMCAtom"],
+ 4089: ["DFF_PST_HeaderMCAtom"],
+ 4090: ["DFF_PST_FooterMCAtom"],
+ 4100: ["DFF_PST_ExMediaAtom"],
+ 4101: ["DFF_PST_ExVideo"],
+ 4102: ["DFF_PST_ExAviMovie"],
+ 4103: ["DFF_PST_ExMCIMovie"],
+ 4109: ["DFF_PST_ExMIDIAudio"],
+ 4110: ["DFF_PST_ExCDAudio"],
+ 4111: ["DFF_PST_ExWAVAudioEmbedded"],
+ 4112: ["DFF_PST_ExWAVAudioLink"],
+ 4113: ["DFF_PST_ExOleObjStg"],
+ 4114: ["DFF_PST_ExCDAudioAtom"],
+ 4115: ["DFF_PST_ExWAVAudioEmbeddedAtom"],
+ 4116: ["DFF_PST_AnimationInfo"],
+ 4117: ["DFF_PST_RTFDateTimeMCAtom"],
+ 5000: ["DFF_PST_ProgTags"],
+ 5001: ["DFF_PST_ProgStringTag"],
+ 5002: ["DFF_PST_ProgBinaryTag"],
+ 5003: ["DFF_PST_BinaryTagData"],
+ 6000: ["DFF_PST_PrintOptions"],
+ 6001: ["DFF_PST_PersistPtrFullBlock"],
+ 6002: ["DFF_PST_PersistPtrIncrementalBlock"],
+10000: ["DFF_PST_RulerIndentAtom"],
+10001: ["DFF_PST_GscalingAtom"],
+10002: ["DFF_PST_GrColorAtom"],
+10003: ["DFF_PST_GLPointAtom"],
+10004: ["DFF_PST_GlineAtom"],
+0xF000: ["DFF_msofbtDggContainer"],
+0xF006: ["DFF_msofbtDgg"],
+0xF016: ["DFF_msofbtCLSID"],
+0xF00B: ["DFF_msofbtOPT"],
+0xF11A: ["DFF_msofbtColorMRU"],
+0xF11E: ["DFF_msofbtSplitMenuColors"],
+0xF001: ["DFF_msofbtBstoreContainer"],
+0xF007: ["DFF_msofbtBSE"],
+0xF018: ["DFF_msofbtBlipFirst"],
+0xF117: ["DFF_msofbtBlipLast"],
+0xF002: ["DFF_msofbtDgContainer"],
+0xF008: ["DFF_msofbtDg"],
+0xF118: ["DFF_msofbtRegroupItems"],
+0xF120: ["DFF_msofbtColorScheme"],
+0xF003: ["DFF_msofbtSpgrContainer"],
+0xF004: ["DFF_msofbtSpContainer"],
+0xF009: ["DFF_msofbtSpgr"],
+0xF00A: ["DFF_msofbtSp"],
+0xF00C: ["DFF_msofbtTextbox"],
+0xF00D: ["DFF_msofbtClientTextbox"],
+0xF00E: ["DFF_msofbtAnchor"],
+0xF00F: ["DFF_msofbtChildAnchor"],
+0xF010: ["DFF_msofbtClientAnchor"],
+0xF011: ["DFF_msofbtClientData"],
+0xF11F: ["DFF_msofbtOleObject"],
+0xF11D: ["DFF_msofbtDeletedPspl"],
+0xF122: ["DFF_msofbtUDefProp"],
+0xF005: ["DFF_msofbtSolverContainer"],
+0xF012: ["DFF_msofbtConnectorRule"],
+0xF013: ["DFF_msofbtAlignRule"],
+0xF014: ["DFF_msofbtArcRule"],
+0xF015: ["DFF_msofbtClientRule"],
+0xF017: ["DFF_msofbtCalloutRule"],
+0xF119: ["DFF_msofbtSelection"]
+class PPTStream(object):
+ def __init__ (self, chars, params):
+ self.chars = chars
+ self.size = len(self.chars)
+ self.pos = 0
+ self.version = None
+ self.header = None
+ self.params = params
+ def __printSep (self, c='-', w=68, prefix=''):
+ print(prefix + c*w)
+ def printStreamInfo (self):
+ self.__printSep('=', 68)
+ print("PPT File Format Dumper by Kohei Yoshida & Thorsten Behrens")
+ print(" total stream size: %d bytes"%self.size)
+ self.__printSep('=', 68)
+ print('')
+ def printHeader (self):
+ self.header = ole.Header(self.chars, self.params)
+ self.pos = self.header.parse()
+ self.header.output()
+ def __getDirectoryObj (self):
+ obj = self.header.getDirectory()
+ if obj == None:
+ return None
+ obj.parseDirEntries()
+ return obj
+ def printDirectory (self):
+ obj = self.__getDirectoryObj()
+ if obj == None:
+ return
+ obj.output()
+ def getDirectoryNames (self):
+ obj = self.__getDirectoryObj()
+ if obj == None:
+ return
+ return obj.getDirectoryNames()
+ def getDirectoryStreamByName (self, name):
+ obj = self.__getDirectoryObj()
+ bytes = []
+ if obj != None:
+ bytes = obj.getRawStreamByName(name)
+ strm = PPTDirStream(bytes, self.params)
+ return strm
+class PPTDirStream(object):
+ def __init__ (self, bytes, params):
+ self.bytes = bytes
+ self.size = len(self.bytes)
+ self.pos = 0
+ self.params = params
+ def readRaw (self, size=1):
+ # PPT stores little endian
+ bytes = 0
+ for i in xrange(0, size):
+ b = ord(self.bytes[self.pos])
+ if i == 0:
+ bytes = b
+ else:
+ bytes += b*(256**i)
+ self.pos += 1
+ return bytes
+ def readByteArray (self, size=1):
+ bytes = []
+ for i in xrange(0, size):
+ if self.pos >= self.size:
+ raise EndOfStream
+ bytes.append(ord(self.bytes[self.pos]))
+ self.pos += 1
+ return bytes
+ def __printSep (self, c='-', w=68, prefix=''):
+ print(prefix + c*w)
+ def readRecord (self):
+ if self.size - self.pos < 4:
+ raise EndOfStream
+ pos = self.pos
+ header = self.readRaw(2)
+ if header == 0x0000:
+ raise EndOfStream
+ size = self.readRaw(2)
+ bytes = self.readByteArray(size)
+ # record handler that parses the raw bytes and displays more
+ # meaningful information.
+ handler = None
+ print("")
+ self.__printSep('=', 61, "%4.4Xh: "%header)
+ if recData.has_key(header):
+ print("%4.4Xh: %s (%4.4Xh)"%
+ (header, recData[header][0], header))
+ if len(recData[header]) >= 2:
+ handler = recData[header][1](header, size, bytes)
+ else:
+ print("%4.4Xh: [unknown record name] (%4.4Xh)"%(header, header))
+ print("%4.4Xh: size = %d; pos = %d"%(header, size, pos))
+ self.__printSep('-', 61, "%4.4Xh: "%header)
+ for i in xrange(0, size):
+ if (i+1) % 16 == 1:
+ output("%4.4Xh: "%header)
+ output("%2.2X "%bytes[i])
+ if (i+1) % 16 == 0 and i != size-1:
+ print("")
+ if size > 0:
+ print("")
+ if handler != None:
+ # record handler exists. Parse the record and display more info.
+ handler.output()
+ return header
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
Thread Index]
Date Index]
Author Index]