Main Page | Class Hierarchy | Data Structures | File List | Data Fields | Globals

fx-0.8.0/lib/ofx_preproc.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002           ofx_preproc.cpp 
00003                              -------------------
00004     copyright            : (C) 2002 by Benoit Grégoir
00005     email                : bock@step.polymtl.ca
00006 ***************************************************************************/
00012 /***************************************************************************
00013  *                                                                         *
00014  *   This program is free software; you can redistribute it and/or modify  *
00015  *   it under the terms of the GNU General Public License as published by  *
00016  *   the Free Software Foundation; either version 2 of the License, or     *
00017  *   (at your option) any later version.                                   *
00018  *                                                                         *
00019  ***************************************************************************/
00020 #include <iostream>
00021 #include <fstream>
00022 #include <stdlib.h>
00023 #include <stdio.h>
00024 #include <string>
00025 #include "ParserEventGeneratorKit.h"
00026 #include "libofx.h"
00027 #include "messages.hh"
00028 #include "ofx_sgml.hh"
00029 #include "ofc_sgml.hh"
00030 #include "ofx_preproc.hh"
00031 
00032 using namespace std;
00036 #ifdef MAKEFILE_DTD_PATH
00037 const int DTD_SEARCH_PATH_NUM = 4;
00038 #else
00039 const int DTD_SEARCH_PATH_NUM = 3;
00040 #endif
00041  
00045 const char *DTD_SEARCH_PATH[DTD_SEARCH_PATH_NUM] = { 
00046 #ifdef MAKEFILE_DTD_PATH
00047   MAKEFILE_DTD_PATH , 
00048 #endif
00049   "/usr/local/share/libofx/dtd/", 
00050   "/usr/share/libofx/dtd/", 
00051   "~/"};
00052 const unsigned int READ_BUFFER_SIZE = 1024;
00053 
00058 CFCT int ofx_proc_file(LibofxContextPtr ctx, const char * p_filename)
00059   {
00060   LibofxContext *libofx_context;
00061   bool ofx_start=false;
00062   bool ofx_end=false;
00063 
00064   ifstream input_file;
00065   ofstream tmp_file;
00066   char buffer[READ_BUFFER_SIZE];
00067   string s_buffer;
00068   char *filenames[3];
00069   char tmp_filename[50];
00070 
00071   libofx_context=(LibofxContext*)ctx;
00072 
00073   if(p_filename!=NULL&&strcmp(p_filename,"")!=0)
00074     {
00075     message_out(DEBUG, string("ofx_proc_file():Opening file: ")+ p_filename);
00076     
00077     input_file.open(p_filename);
00078     strncpy(tmp_filename,"/tmp/libofxtmpXXXXXX",50);
00079     mkstemp(tmp_filename);
00080     tmp_file.open(tmp_filename);
00081 
00082     message_out(DEBUG,"ofx_proc_file(): Creating temp file: "+string(tmp_filename));
00083     if(!input_file){
00084       message_out(ERROR,"ofx_proc_file():Unable to open the input file "+string(p_filename));
00085     }
00086     else if(!tmp_file){
00087       message_out(ERROR,"ofx_proc_file():Unable to open the output file "+string(tmp_filename));
00088     }
00089     else
00090       {
00091 
00092         do {
00093           input_file.getline(buffer, sizeof(buffer),'\n');
00094           //cout<<buffer<<"\n";
00095           s_buffer.assign(buffer);
00096           //cout<<"input_file.gcount(): "<<input_file.gcount()<<" sizeof(buffer): "<<sizeof(buffer)<<endl;
00097           if(input_file.gcount()<(sizeof(buffer)-1))
00098             {
00099               s_buffer.append("\n");
00100             }
00101           else if( !input_file.eof()&&input_file.fail())
00102             {
00103               input_file.clear();
00104             }
00105           int ofx_start_idx;
00106           if (ofx_start==false &&
00107               (
00108                (libofx_context->currentFileType()==OFX&&
00109                 ((ofx_start_idx=s_buffer.find("<OFX>"))!=
00110                  string::npos||(ofx_start_idx=s_buffer.find("<ofx>"))!=string::npos))
00111                || (libofx_context->currentFileType()==OFC&&
00112                    ((ofx_start_idx=s_buffer.find("<OFC>"))!=string::npos||
00113                     (ofx_start_idx=s_buffer.find("<ofc>"))!=string::npos))
00114               )
00115              )
00116             {
00117               ofx_start=true;
00118               s_buffer.erase(0,ofx_start_idx);//Fix for really broken files that don't have a newline after the header.
00119               message_out(DEBUG,"ofx_proc_file():<OFX> or <OFC> has been found");
00120             }
00121 
00122           if(ofx_start==true && ofx_end==false){
00123             s_buffer=sanitize_proprietary_tags(s_buffer);
00124             //cout<< s_buffer<<"\n";
00125             tmp_file.write(s_buffer.c_str(), s_buffer.length());
00126           }
00127           
00128           if (ofx_start==true &&
00129               (
00130                (libofx_context->currentFileType()==OFX &&
00131                 ((ofx_start_idx=s_buffer.find("</OFX>"))!=string::npos ||
00132                  (ofx_start_idx=s_buffer.find("</ofx>"))!=string::npos))
00133                || (libofx_context->currentFileType()==OFC &&
00134                    ((ofx_start_idx=s_buffer.find("</OFC>"))!=string::npos ||
00135                     (ofx_start_idx=s_buffer.find("</ofc>"))!=string::npos))
00136               )
00137              )
00138             {
00139               ofx_end=true;
00140               message_out(DEBUG,"ofx_proc_file():</OFX> or </OFC>  has been found");
00141             }
00142 
00143         } while(!input_file.eof()&&!input_file.bad());
00144       }
00145     input_file.close();
00146     tmp_file.close();
00147 
00148     char filename_openspdtd[255];
00149     char filename_dtd[255];
00150     char filename_ofx[255];
00151     strncpy(filename_openspdtd,find_dtd(OPENSPDCL_FILENAME).c_str(),255);//The opensp sgml dtd file
00152     if(libofx_context->currentFileType()==OFX)
00153       {
00154         strncpy(filename_dtd,find_dtd(OFX160DTD_FILENAME).c_str(),255);//The ofx dtd file
00155       }
00156     else if(libofx_context->currentFileType()==OFC)
00157       {
00158         strncpy(filename_dtd,find_dtd(OFCDTD_FILENAME).c_str(),255);//The ofc dtd file
00159       }
00160     else
00161       {
00162         message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00163       }
00164 
00165     if((string)filename_dtd!="" && (string)filename_openspdtd!="")
00166       {
00167         strncpy(filename_ofx,tmp_filename,255);//The processed ofx file
00168         filenames[0]=filename_openspdtd;
00169         filenames[1]=filename_dtd;
00170         filenames[2]=filename_ofx;
00171         if(libofx_context->currentFileType()==OFX)
00172           {
00173             ofx_proc_sgml(libofx_context, 3,filenames);
00174           }
00175         else if(libofx_context->currentFileType()==OFC)
00176           {
00177             ofc_proc_sgml(libofx_context, 3,filenames);
00178           }
00179         else
00180           {
00181             message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00182           }
00183         if(remove(tmp_filename)!=0)
00184           {
00185             message_out(ERROR,"ofx_proc_file(): Error deleting temporary file "+string(tmp_filename));
00186           }
00187       }
00188     else
00189       {
00190         message_out(ERROR,"ofx_proc_file(): FATAL: Missing DTD, aborting");
00191       }
00192   }
00193   else{
00194     message_out(ERROR,"ofx_proc_file():No input file specified");
00195   }
00196   return 0;
00197 }
00198 
00199 
00200 
00201 CFCT int libofx_proc_buffer(LibofxContextPtr ctx,
00202                             const char *s, unsigned int size){
00203   ofstream tmp_file;
00204   string s_buffer;
00205   char *filenames[3];
00206   char tmp_filename[50];
00207   unsigned int pos;
00208   LibofxContext *libofx_context;
00209 
00210   libofx_context=(LibofxContext*)ctx;
00211 
00212   if (size==0) {
00213     message_out(ERROR,
00214                 "ofx_proc_file(): bad size");
00215     return -1;
00216   }
00217   s_buffer=string(s, size);
00218 
00219   strncpy(tmp_filename,"/tmp/libofxtmpXXXXXX",50);
00220   mkstemp(tmp_filename);
00221   tmp_file.open(tmp_filename);
00222 
00223   message_out(DEBUG,"ofx_proc_file(): Creating temp file: "+string(tmp_filename));
00224   if(!tmp_file){
00225     message_out(ERROR,"ofx_proc_file():Unable to open the output file "+string(tmp_filename));
00226     return -1;
00227   }
00228 
00229   if (libofx_context->currentFileType()==OFX) {
00230     pos=s_buffer.find("<OFX>");
00231     if (pos==string::npos)
00232       pos=s_buffer.find("<ofx>");
00233   }
00234   else if (libofx_context->currentFileType()==OFC) {
00235     pos=s_buffer.find("<OFC>");
00236     if (pos==string::npos)
00237       pos=s_buffer.find("<ofc>");
00238   }
00239   else {
00240     message_out(ERROR,"ofx_proc(): unknown file type");
00241     return -1;
00242   }
00243   if (pos==string::npos) {
00244     message_out(ERROR,"ofx_proc():<OFX> has not been found");
00245     return -1;
00246   }
00247   else {
00248     // erase everything before the OFX tag
00249     s_buffer.erase(0, pos);
00250     message_out(DEBUG,"ofx_proc_file():<OF?> has been found");
00251   }
00252 
00253   if (libofx_context->currentFileType()==OFX) {
00254     pos=s_buffer.find("</OFX>");
00255     if (pos==string::npos)
00256       pos=s_buffer.find("</ofx>");
00257   }
00258   else if (libofx_context->currentFileType()==OFC) {
00259     pos=s_buffer.find("</OFC>");
00260     if (pos==string::npos)
00261       pos=s_buffer.find("</ofc>");
00262   }
00263   else {
00264     message_out(ERROR,"ofx_proc(): unknown file type");
00265     return -1;
00266   }
00267 
00268   if (pos==string::npos) {
00269     message_out(ERROR,"ofx_proc():</OF?> has not been found");
00270     return -1;
00271   }
00272   else {
00273     // erase everything after the /OFX tag
00274     s_buffer.erase(pos+6);
00275     message_out(DEBUG,"ofx_proc_file():<OFX> has been found");
00276   }
00277 
00278   s_buffer=sanitize_proprietary_tags(s_buffer);
00279   tmp_file.write(s_buffer.c_str(), s_buffer.length());
00280 
00281   tmp_file.close();
00282 
00283   char filename_openspdtd[255];
00284   char filename_dtd[255];
00285   char filename_ofx[255];
00286   strncpy(filename_openspdtd,find_dtd(OPENSPDCL_FILENAME).c_str(),255);//The opensp sgml dtd file
00287   if(libofx_context->currentFileType()==OFX){
00288     strncpy(filename_dtd,find_dtd(OFX160DTD_FILENAME).c_str(),255);//The ofx dtd file
00289   }
00290   else if(libofx_context->currentFileType()==OFC){
00291     strncpy(filename_dtd,find_dtd(OFCDTD_FILENAME).c_str(),255);//The ofc dtd file
00292   }
00293   else {
00294     message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00295   }
00296 
00297   if((string)filename_dtd!="" && (string)filename_openspdtd!=""){
00298     strncpy(filename_ofx,tmp_filename,255);//The processed ofx file
00299     filenames[0]=filename_openspdtd;
00300     filenames[1]=filename_dtd;
00301     filenames[2]=filename_ofx;
00302     if(libofx_context->currentFileType()==OFX){
00303       ofx_proc_sgml(libofx_context, 3,filenames);
00304     }
00305     else if(libofx_context->currentFileType()==OFC){
00306       ofc_proc_sgml(libofx_context, 3,filenames);
00307     }
00308     else {
00309       message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00310     }
00311     if(remove(tmp_filename)!=0){
00312       message_out(ERROR,"ofx_proc_file(): Error deleting temporary file "+string(tmp_filename));
00313     }
00314   }
00315   else {
00316     message_out(ERROR,"ofx_proc_file(): FATAL: Missing DTD, aborting");
00317   }
00318 
00319   return 0;
00320 }
00321 
00322 
00323 
00324 
00325 
00326 
00331 string sanitize_proprietary_tags(string input_string)
00332 {
00333   unsigned int i;
00334   size_t input_string_size;
00335   bool strip=false;
00336   bool tag_open=false;
00337   int tag_open_idx=0;//Are we within < > ?
00338   bool closing_tag_open=false;//Are we within </ > ?
00339   int orig_tag_open_idx=0;
00340   bool proprietary_tag=false; //Are we within a proprietary element?
00341   bool proprietary_closing_tag=false;
00342   int crop_end_idx=0;
00343   char buffer[READ_BUFFER_SIZE]="";
00344   char tagname[READ_BUFFER_SIZE]="";
00345   int tagname_idx=0;
00346   char close_tagname[READ_BUFFER_SIZE]="";
00347  
00348   for(i=0;i<READ_BUFFER_SIZE;i++){
00349     buffer[i]=0;
00350     tagname[i]=0;
00351     close_tagname[i]=0;
00352   }
00353   
00354   input_string_size=input_string.size();
00355   
00356   for(i=0;i<=input_string_size;i++){
00357     if(input_string.c_str()[i]=='<'){
00358       tag_open=true;
00359       tag_open_idx=i;
00360       if(proprietary_tag==true&&input_string.c_str()[i+1]=='/'){
00361         //We are now in a closing tag
00362         closing_tag_open=true;
00363         //cout<<"Comparaison: "<<tagname<<"|"<<&(input_string.c_str()[i+2])<<"|"<<strlen(tagname)<<endl;
00364         if(strncmp(tagname,&(input_string.c_str()[i+2]),strlen(tagname))!=0){
00365           //If it is the begining of an other tag
00366           //cout<<"DIFFERENT!"<<endl;
00367           crop_end_idx=i-1;
00368           strip=true;
00369         }
00370         else{
00371           //Otherwise, it is the start of the closing tag of the proprietary tag
00372           proprietary_closing_tag=true;
00373         }
00374       }
00375       else if(proprietary_tag==true){
00376         //It is the start of a new tag, following a proprietary tag
00377         crop_end_idx=i-1;
00378         strip=true;
00379       }
00380     }
00381     else if(input_string.c_str()[i]=='>'){
00382       tag_open=false;
00383       closing_tag_open=false;
00384       tagname[tagname_idx]=0;
00385       tagname_idx=0;
00386       if(proprietary_closing_tag==true){
00387         crop_end_idx=i;
00388         strip=true;
00389       }
00390     }
00391     else if(tag_open==true&&closing_tag_open==false){
00392       if(input_string.c_str()[i]=='.'){
00393         if(proprietary_tag!=true){
00394           orig_tag_open_idx = tag_open_idx;
00395           proprietary_tag=true;
00396         }
00397       }
00398       tagname[tagname_idx]=input_string.c_str()[i];
00399       tagname_idx++;
00400     }
00401     //cerr <<i<<endl;
00402     if(strip==true)
00403       {
00404         input_string.copy(buffer,(crop_end_idx-orig_tag_open_idx)+1,orig_tag_open_idx);
00405         message_out(INFO,"sanitize_proprietary_tags() (end tag or new tag) removed: "+string(buffer));
00406         input_string.erase(orig_tag_open_idx,(crop_end_idx-orig_tag_open_idx)+1);
00407         i=orig_tag_open_idx-1;
00408         proprietary_tag=false;
00409         proprietary_closing_tag=false;
00410         closing_tag_open=false;
00411         tag_open=false;
00412         strip=false;
00413       }
00414 
00415   }//end for
00416   if(proprietary_tag==true){
00417     if(crop_end_idx==0){//no closing tag
00418       crop_end_idx=input_string.size()-1;
00419     }
00420     input_string.copy(buffer,(crop_end_idx-orig_tag_open_idx)+1,orig_tag_open_idx);
00421     message_out(INFO,"sanitize_proprietary_tags() (end of line) removed: "+string(buffer));
00422     input_string.erase(orig_tag_open_idx,(crop_end_idx-orig_tag_open_idx)+1);
00423   }
00424   return input_string;
00425 }
00426 
00427 
00428 
00434 string find_dtd(string dtd_filename)
00435 {
00436   int i;
00437   ifstream dtd_file;
00438   string dtd_path_filename;
00439   bool dtd_found=false;
00440 
00441   for(i=0;i<DTD_SEARCH_PATH_NUM&&dtd_found==false;i++){
00442     dtd_path_filename=DTD_SEARCH_PATH[i];
00443     dtd_path_filename.append(dtd_filename);
00444     dtd_file.clear();
00445     dtd_file.open(dtd_path_filename.c_str());
00446     if(!dtd_file){
00447       message_out(DEBUG,"find_dtd():Unable to open the file "+dtd_path_filename);
00448     }
00449     else{
00450       message_out(STATUS,"find_dtd():DTD found: "+dtd_path_filename);
00451       dtd_file.close();
00452       dtd_found=true;
00453     }
00454   }
00455   if(dtd_found==false){
00456     message_out(ERROR,"find_dtd():Unable to find the DTD named " + dtd_filename);
00457     dtd_path_filename="";
00458   }
00459   return dtd_path_filename;
00460 }
00461 
00462 

Generated on Sun Jul 31 15:38:35 2005 for LibOFX by  doxygen 1.3.9.1