MSPUBParser.h
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* libmspub
00003  * Version: MPL 1.1 / GPLv2+ / LGPLv2+
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License or as specified alternatively below. You may obtain a copy of
00008  * the License at http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * Major Contributor(s):
00016  * Copyright (C) 2012 Brennan Vincent <brennanv@email.arizona.edu>
00017  * Copyright (C) 2012 Fridrich Strba <fridrich.strba@bluewin.ch>
00018  *
00019  * All Rights Reserved.
00020  *
00021  * For minor contributions see the git repository.
00022  *
00023  * Alternatively, the contents of this file may be used under the terms of
00024  * either the GNU General Public License Version 2 or later (the "GPLv2+"), or
00025  * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
00026  * in which case the provisions of the GPLv2+ or the LGPLv2+ are applicable
00027  * instead of those above.
00028  */
00029 
00030 #ifndef __MSPUBPARSER_H__
00031 #define __MSPUBPARSER_H__
00032 
00033 #include <map>
00034 #include <set>
00035 #include <vector>
00036 #include <memory>
00037 
00038 #include <boost/shared_ptr.hpp>
00039 #include <boost/optional.hpp>
00040 
00041 #include <libwpd/libwpd.h>
00042 #include <libwpg/libwpg.h>
00043 
00044 #include "MSPUBTypes.h"
00045 #include "Fill.h"
00046 #include "Coordinate.h"
00047 
00048 class WPXInputStream;
00049 
00050 namespace libmspub
00051 {
00052 class MSPUBCollector;
00053 class FindBySeqNum
00054 {
00055   unsigned seqNum;
00056 public:
00057   FindBySeqNum(unsigned sn) : seqNum(sn) { }
00058   bool operator()(const libmspub::ContentChunkReference &ref)
00059   {
00060     return ref.seqNum == seqNum;
00061   }
00062 };
00063 
00064 class FindByParentSeqNum
00065 {
00066   unsigned seqNum;
00067 public:
00068   FindByParentSeqNum(unsigned sn) : seqNum(sn) { }
00069   bool operator()(const libmspub::ContentChunkReference &ref)
00070   {
00071     return ref.parentSeqNum == seqNum;
00072   }
00073 };
00074 
00075 
00076 class MSPUBParser
00077 {
00078 public:
00079   explicit MSPUBParser(WPXInputStream *input, MSPUBCollector *collector);
00080   virtual ~MSPUBParser();
00081   virtual bool parse();
00082 protected:
00083   virtual unsigned getColorIndexByQuillEntry(unsigned entry);
00084 
00085   struct TextSpanReference
00086   {
00087     TextSpanReference(unsigned short f, unsigned short l, CharacterStyle cs) : first(f), last(l), charStyle(cs) { }
00088     unsigned short first;
00089     unsigned short last;
00090     CharacterStyle charStyle;
00091   };
00092 
00093   struct TextParagraphReference
00094   {
00095     TextParagraphReference(unsigned short f, unsigned short l, ParagraphStyle ps) : first(f), last(l), paraStyle(ps) { }
00096     unsigned short first;
00097     unsigned short last;
00098     ParagraphStyle paraStyle;
00099   };
00100 
00101   typedef std::vector<ContentChunkReference>::const_iterator ccr_iterator_t;
00102 
00103   MSPUBParser();
00104   MSPUBParser(const MSPUBParser &);
00105   MSPUBParser &operator=(const MSPUBParser &);
00106   virtual bool parseContents(WPXInputStream *input);
00107   bool parseQuill(WPXInputStream *input);
00108   bool parseEscher(WPXInputStream *input);
00109   bool parseEscherDelay(WPXInputStream *input);
00110 
00111   MSPUBBlockInfo parseBlock(WPXInputStream *input, bool skipHierarchicalData = false);
00112   EscherContainerInfo parseEscherContainer(WPXInputStream *input);
00113 
00114   bool parseContentChunkReference(WPXInputStream *input, MSPUBBlockInfo block);
00115   QuillChunkReference parseQuillChunkReference(WPXInputStream *input);
00116   bool parseDocumentChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00117   bool parsePageChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00118   bool parsePaletteChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00119   bool parseShapes(WPXInputStream *input, MSPUBBlockInfo block, unsigned pageSeqNum);
00120   bool parseShape(WPXInputStream *input, unsigned seqNum, unsigned pageSeqNum, bool parseWithoutDimensions, bool isGroup);
00121   void parsePaletteEntry(WPXInputStream *input, MSPUBBlockInfo block);
00122   void parseColors(WPXInputStream *input, const QuillChunkReference &chunk);
00123   void parseFonts(WPXInputStream *input, const QuillChunkReference &chunk);
00124   void parseDefaultStyle(WPXInputStream *input, const QuillChunkReference &chunk);
00125   void parseShapeGroup(WPXInputStream *input, const EscherContainerInfo &spgr, bool topLevel, Coordinate parentCoordinateSystem, Coordinate parentGroupAbsoluteCoord);
00126   void skipBlock(WPXInputStream *input, MSPUBBlockInfo block);
00127   void parseEscherShape(WPXInputStream *input, const EscherContainerInfo &sp, bool topLevel, Coordinate &parentCoordinateSystem, Coordinate &parentGroupAbsoluteCoord);
00128   bool findEscherContainer(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, unsigned short type);
00129   bool findEscherContainerWithTypeInSet(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, std::set<unsigned short> types);
00130   std::map<unsigned short, unsigned> extractEscherValues(WPXInputStream *input, const EscherContainerInfo &record);
00131   std::vector<TextSpanReference> parseCharacterStyles(WPXInputStream *input, const QuillChunkReference &chunk);
00132   std::vector<TextParagraphReference> parseParagraphStyles(WPXInputStream *input, const QuillChunkReference &chunk);
00133   int getColorIndex(WPXInputStream *input, const MSPUBBlockInfo &info);
00134   unsigned getFontIndex(WPXInputStream *input, const MSPUBBlockInfo &info);
00135   CharacterStyle getCharacterStyle(WPXInputStream *input, bool inStsh = false);
00136   ParagraphStyle getParagraphStyle(WPXInputStream *input);
00137   boost::shared_ptr<Fill> getNewFill(const std::map<unsigned short, unsigned> &foptValues, bool &skipIfNotBg);
00138 
00139   WPXInputStream *m_input;
00140   MSPUBCollector *m_collector;
00141   std::vector<MSPUBBlockInfo> m_blockInfo;
00142   std::vector<ContentChunkReference> m_contentChunks;
00143   std::vector<unsigned> m_pageChunkIndices;
00144   std::vector<unsigned> m_shapeChunkIndices;
00145   std::vector<unsigned> m_paletteChunkIndices;
00146   std::vector<unsigned> m_unknownChunkIndices;
00147   boost::optional<unsigned> m_documentChunkIndex;
00148   int m_lastSeenSeqNum;
00149   unsigned m_lastAddedImage;
00150   std::vector<int> m_alternateShapeSeqNums;
00151   std::vector<int> m_escherDelayIndices;
00152 
00153   static short getBlockDataLength(unsigned type);
00154   static bool isBlockDataString(unsigned type);
00155   static PageType getPageTypeBySeqNum(unsigned seqNum);
00156   static unsigned getEscherElementTailLength(unsigned short type);
00157   static unsigned getEscherElementAdditionalHeaderLength(unsigned short type);
00158   static ImgType imgTypeByBlipType(unsigned short type);
00159   static int getStartOffset(ImgType type, unsigned short initial);
00160   static bool lineExistsByFlagPointer(unsigned *);
00161 };
00162 
00163 } // namespace libmspub
00164 
00165 #endif //  __MSPUBRAPHICS_H__
00166 /* vim:set shiftwidth=2 softtabstop=2 expandtab: */