MsWrdText.hxx
Go to the documentation of this file.
00001 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
00002 
00003 /* libmwaw
00004 * Version: MPL 2.0 / LGPLv2+
00005 *
00006 * The contents of this file are subject to the Mozilla Public License Version
00007 * 2.0 (the "License"); you may not use this file except in compliance with
00008 * the License or as specified alternatively below. You may obtain a copy of
00009 * the License at http://www.mozilla.org/MPL/
00010 *
00011 * Software distributed under the License is distributed on an "AS IS" basis,
00012 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00013 * for the specific language governing rights and limitations under the
00014 * License.
00015 *
00016 * Major Contributor(s):
00017 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
00018 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
00019 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
00020 * Copyright (C) 2006, 2007 Andrew Ziem
00021 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
00022 *
00023 *
00024 * All Rights Reserved.
00025 *
00026 * For minor contributions see the git repository.
00027 *
00028 * Alternatively, the contents of this file may be used under the terms of
00029 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
00030 * in which case the provisions of the LGPLv2+ are applicable
00031 * instead of those above.
00032 */
00033 
00034 /*
00035  * Parser to Microsoft Word text document
00036  *
00037  */
00038 #ifndef MS_WRD_MWAW_TEXT
00039 #  define MS_WRD_MWAW_TEXT
00040 
00041 #include <map>
00042 #include <string>
00043 #include <vector>
00044 
00045 #include "libmwaw_internal.hxx"
00046 
00047 #include "MWAWEntry.hxx"
00048 
00049 #include "MWAWDebug.hxx"
00050 
00051 #include "MsWrdTextStyles.hxx"
00052 
00053 namespace MsWrdTextInternal
00054 {
00055 struct State;
00056 struct Table;
00057 }
00058 
00059 struct MsWrdEntry;
00060 class MsWrdParser;
00061 class MsWrdTextStyles;
00062 
00064 class MsWrdText
00065 {
00066   friend class MsWrdParser;
00067   friend class MsWrdTextStyles;
00068 public:
00070   struct PLC {
00071     enum Type { TextPosition, HeaderFooter, Page, Section, ParagraphInfo, Paragraph, Font, Footnote, FootnoteDef, Field, Object };
00072     PLC(Type type, int id=0) : m_type(type), m_id(id), m_extra("")
00073     {
00074     }
00076     friend std::ostream &operator<<(std::ostream &o, PLC const &plc);
00078     struct ltstr {
00079       bool operator()(PLC const &s1, PLC const &s2) const
00080       {
00081         if (s1.m_type != s2.m_type)
00082           return int(s1.m_type) < int(s2.m_type);
00083         if (s1.m_id != s2.m_id)
00084           return s1.m_id < s2.m_id;
00085         return false;
00086       }
00087     };
00089     Type m_type;
00091     int m_id;
00093     std::string m_extra;
00094   };
00095 public:
00097   MsWrdText(MsWrdParser &parser);
00099   virtual ~MsWrdText();
00100 
00102   int version() const;
00103 
00105   int numPages() const;
00106 
00108   MWAWEntry getHeader() const;
00109 
00111   MWAWEntry getFooter() const;
00112 protected:
00114   shared_ptr<MWAWParserState> &getParserState()
00115   {
00116     return m_parserState;
00117   }
00118 
00120   bool sendMainText();
00121 
00123   bool sendText(MWAWEntry const &textEntry, bool mainZone, bool tableCell=false);
00125   bool sendSection(int sectionId);
00127   bool readHeaderTextLength();
00128 
00130   bool createZones(long bot);
00131 
00133   bool readTextStruct(MsWrdEntry &entry);
00134 
00136   bool readPageBreak(MsWrdEntry &entry);
00137 
00139   bool readParagraphInfo(MsWrdEntry entry);
00140 
00142   bool readFields(MsWrdEntry &entry, std::vector<long> const &fieldPos);
00143 
00145   bool sendFieldComment(int id);
00146 
00148   bool readFootnotesPos(MsWrdEntry &entry, std::vector<long> const &noteDef);
00149 
00151   bool readFootnotesData(MsWrdEntry &entry);
00152 
00154   bool sendFootnote(int id);
00155 
00157   bool readFontNames(MsWrdEntry &entry);
00158 
00160   void flushExtra();
00161 
00163   bool sendTable(MsWrdTextInternal::Table const &table);
00164 
00165   // interface with MsWrdTextStyles
00166 
00168   long getMainTextLength() const;
00170   std::multimap<long, MsWrdText::PLC> &getTextPLCMap();
00172   std::multimap<long, MsWrdText::PLC> &getFilePLCMap();
00173 
00174   //
00175   // low level
00176   //
00177 
00179   void prepareData();
00180 
00182   void prepareLines();
00184   void convertFilePLCPos();
00186   void prepareParagraphProperties();
00188   void prepareFontProperties();
00189 
00191   void prepareTableLimits();
00193   bool updateTableBeginnningAt(long cPos, long &nextCPos);
00194 
00196   bool readLongZone(MsWrdEntry &entry, int sz, std::vector<long> &list);
00197 
00198 private:
00199   MsWrdText(MsWrdText const &orig);
00200   MsWrdText &operator=(MsWrdText const &orig);
00201 
00202 protected:
00203   //
00204   // data
00205   //
00207   MWAWParserStatePtr m_parserState;
00208 
00210   shared_ptr<MsWrdTextInternal::State> m_state;
00211 
00213   shared_ptr<MsWrdTextStyles> m_stylesManager;
00214 
00216   MsWrdParser *m_mainParser;
00217 };
00218 #endif
00219 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab: