00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #ifndef __MSPUBPARSER_H__
00031 #define __MSPUBPARSER_H__
00032
00033 #include <map>
00034 #include <set>
00035 #include <vector>
00036 #include <memory>
00037
00038 #include <boost/shared_ptr.hpp>
00039 #include <boost/optional.hpp>
00040
00041 #include <libwpd/libwpd.h>
00042 #include <libwpg/libwpg.h>
00043
00044 #include "MSPUBTypes.h"
00045 #include "Fill.h"
00046 #include "Coordinate.h"
00047
00048 class WPXInputStream;
00049
00050 namespace libmspub
00051 {
00052 class MSPUBCollector;
00053 class FindBySeqNum
00054 {
00055 unsigned seqNum;
00056 public:
00057 FindBySeqNum(unsigned sn) : seqNum(sn) { }
00058 bool operator()(const libmspub::ContentChunkReference &ref)
00059 {
00060 return ref.seqNum == seqNum;
00061 }
00062 };
00063
00064 class FindByParentSeqNum
00065 {
00066 unsigned seqNum;
00067 public:
00068 FindByParentSeqNum(unsigned sn) : seqNum(sn) { }
00069 bool operator()(const libmspub::ContentChunkReference &ref)
00070 {
00071 return ref.parentSeqNum == seqNum;
00072 }
00073 };
00074
00075
00076 class MSPUBParser
00077 {
00078 public:
00079 explicit MSPUBParser(WPXInputStream *input, MSPUBCollector *collector);
00080 virtual ~MSPUBParser();
00081 virtual bool parse();
00082 protected:
00083 virtual unsigned getColorIndexByQuillEntry(unsigned entry);
00084
00085 struct TextSpanReference
00086 {
00087 TextSpanReference(unsigned short f, unsigned short l, CharacterStyle cs) : first(f), last(l), charStyle(cs) { }
00088 unsigned short first;
00089 unsigned short last;
00090 CharacterStyle charStyle;
00091 };
00092
00093 struct TextParagraphReference
00094 {
00095 TextParagraphReference(unsigned short f, unsigned short l, ParagraphStyle ps) : first(f), last(l), paraStyle(ps) { }
00096 unsigned short first;
00097 unsigned short last;
00098 ParagraphStyle paraStyle;
00099 };
00100
00101 typedef std::vector<ContentChunkReference>::const_iterator ccr_iterator_t;
00102
00103 MSPUBParser();
00104 MSPUBParser(const MSPUBParser &);
00105 MSPUBParser &operator=(const MSPUBParser &);
00106 virtual bool parseContents(WPXInputStream *input);
00107 bool parseQuill(WPXInputStream *input);
00108 bool parseEscher(WPXInputStream *input);
00109 bool parseEscherDelay(WPXInputStream *input);
00110
00111 MSPUBBlockInfo parseBlock(WPXInputStream *input, bool skipHierarchicalData = false);
00112 EscherContainerInfo parseEscherContainer(WPXInputStream *input);
00113
00114 bool parseContentChunkReference(WPXInputStream *input, MSPUBBlockInfo block);
00115 QuillChunkReference parseQuillChunkReference(WPXInputStream *input);
00116 bool parseDocumentChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00117 bool parsePageChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00118 bool parsePaletteChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00119 bool parseShapes(WPXInputStream *input, MSPUBBlockInfo block, unsigned pageSeqNum);
00120 bool parseShape(WPXInputStream *input, unsigned seqNum, unsigned pageSeqNum, bool parseWithoutDimensions, bool isGroup);
00121 void parsePaletteEntry(WPXInputStream *input, MSPUBBlockInfo block);
00122 void parseColors(WPXInputStream *input, const QuillChunkReference &chunk);
00123 void parseFonts(WPXInputStream *input, const QuillChunkReference &chunk);
00124 void parseDefaultStyle(WPXInputStream *input, const QuillChunkReference &chunk);
00125 void parseShapeGroup(WPXInputStream *input, const EscherContainerInfo &spgr, bool topLevel, Coordinate parentCoordinateSystem, Coordinate parentGroupAbsoluteCoord);
00126 void skipBlock(WPXInputStream *input, MSPUBBlockInfo block);
00127 void parseEscherShape(WPXInputStream *input, const EscherContainerInfo &sp, bool topLevel, Coordinate &parentCoordinateSystem, Coordinate &parentGroupAbsoluteCoord);
00128 bool findEscherContainer(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, unsigned short type);
00129 bool findEscherContainerWithTypeInSet(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, std::set<unsigned short> types);
00130 std::map<unsigned short, unsigned> extractEscherValues(WPXInputStream *input, const EscherContainerInfo &record);
00131 std::vector<TextSpanReference> parseCharacterStyles(WPXInputStream *input, const QuillChunkReference &chunk);
00132 std::vector<TextParagraphReference> parseParagraphStyles(WPXInputStream *input, const QuillChunkReference &chunk);
00133 int getColorIndex(WPXInputStream *input, const MSPUBBlockInfo &info);
00134 unsigned getFontIndex(WPXInputStream *input, const MSPUBBlockInfo &info);
00135 CharacterStyle getCharacterStyle(WPXInputStream *input, bool inStsh = false);
00136 ParagraphStyle getParagraphStyle(WPXInputStream *input);
00137 boost::shared_ptr<Fill> getNewFill(const std::map<unsigned short, unsigned> &foptValues, bool &skipIfNotBg);
00138
00139 WPXInputStream *m_input;
00140 MSPUBCollector *m_collector;
00141 std::vector<MSPUBBlockInfo> m_blockInfo;
00142 std::vector<ContentChunkReference> m_contentChunks;
00143 std::vector<unsigned> m_pageChunkIndices;
00144 std::vector<unsigned> m_shapeChunkIndices;
00145 std::vector<unsigned> m_paletteChunkIndices;
00146 std::vector<unsigned> m_unknownChunkIndices;
00147 boost::optional<unsigned> m_documentChunkIndex;
00148 int m_lastSeenSeqNum;
00149 unsigned m_lastAddedImage;
00150 std::vector<int> m_alternateShapeSeqNums;
00151 std::vector<int> m_escherDelayIndices;
00152
00153 static short getBlockDataLength(unsigned type);
00154 static bool isBlockDataString(unsigned type);
00155 static PageType getPageTypeBySeqNum(unsigned seqNum);
00156 static unsigned getEscherElementTailLength(unsigned short type);
00157 static unsigned getEscherElementAdditionalHeaderLength(unsigned short type);
00158 static ImgType imgTypeByBlipType(unsigned short type);
00159 static int getStartOffset(ImgType type, unsigned short initial);
00160 static bool lineExistsByFlagPointer(unsigned *);
00161 };
00162
00163 }
00164
00165 #endif // __MSPUBRAPHICS_H__
00166