Index: filters/kword/rtf/import/rtfimport_tokenizer.cpp =================================================================== --- filters/kword/rtf/import/rtfimport_tokenizer.cpp (revision 467594) +++ filters/kword/rtf/import/rtfimport_tokenizer.cpp (working copy) @@ -14,7 +14,7 @@ RTFTokenizer::RTFTokenizer() { - tokenText.resize( 4112 ); + tokenText.resize( 4113 ); fileBuffer.resize( 4096 ); infile = 0L; } @@ -28,8 +28,25 @@ fileBufferPtr = 0L; fileBufferEnd = 0L; infile = in; + type = RTFTokenizer::PlainText; } +int RTFTokenizer::nextChar() +{ + if ( fileBufferPtr == fileBufferEnd ) { + int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + fileBufferPtr = ( uchar* ) fileBuffer.data(); + fileBufferEnd = fileBufferPtr; + + if ( n <= 0 ) + return -1; + + fileBufferEnd = fileBufferPtr + n; + } + return *fileBufferPtr++; +} + + /** * Reads the next token. */ @@ -40,22 +57,15 @@ if (!infile) return; - do - { - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + do { + int n = nextChar(); - if (n <= 0) - { - // Return CloseGroup on EOF - ch = '}'; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; + if ( n <= 0 ) { + ch = '}'; + break; + } + + ch = n; } while (ch == '\n' || ch == '\r' && ch != 0); @@ -65,6 +75,7 @@ uchar *_text = (uchar *)text; + if (ch == '{') type = RTFTokenizer::OpenGroup; else if (ch == '}') @@ -73,20 +84,14 @@ { type = RTFTokenizer::ControlWord; - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + int n = nextChar(); - if (n <= 0) - { - // Return CloseGroup on EOF - type = RTFTokenizer::CloseGroup; - return; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; + if ( n <= 0 ) { + // Return CloseGroup on EOF + type = RTFTokenizer::CloseGroup; + return; + } + ch = n; // Type is either control word or control symbol if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) @@ -94,64 +99,41 @@ int v = 0; // Read alphabetic string (command) - while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) + while (_text < ( uchar* )tokenText.data()+tokenText.size()-3 && + ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) ) { *_text++ = ch; - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); - - if (n <= 0) - { - ch = ' '; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; + int n = nextChar(); + if ( n <= 0 ) { + ch = ' '; + break; + } + ch = n; } // Read numeric parameter (param) bool isneg = (ch == '-'); - if (isneg) - { - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); - - if (n <= 0) - { - // Return CloseGroup on EOF - type = RTFTokenizer::CloseGroup; - return; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; + if (isneg) { + int n = nextChar(); + if ( n <= 0 ) { + type = RTFTokenizer::CloseGroup; + return; + } + ch = n; } - while (ch >= '0' && ch <= '9') - { + + while (ch >= '0' && ch <= '9') { v = (10 * v) + ch - '0'; hasParam = true; - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + int n = nextChar(); - if (n <= 0) - { - ch = ' '; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; - } + if ( n <= 0 ) + n = ' '; + ch = n; + } value = isneg ? -v : v; // If delimiter is a space, it's part of the control word @@ -164,19 +146,13 @@ { type = RTFTokenizer::ControlWord; *_text++ = ch; - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); - if (n <= 0) - { - // Return CloseGroup on EOF - type = RTFTokenizer::CloseGroup; - return; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } + int n = nextChar(); + + if ( n <= 0 ) { + type = RTFTokenizer::CloseGroup; + return; + } ch = *fileBufferPtr++; for(int i=0;i<2;i++) { @@ -184,22 +160,16 @@ value<<=4; value=value|((ch + ((ch & 16) ? 0 : 9)) & 0xf); - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + int n = nextChar(); - if (n <= 0) - { - ch = ' '; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } + if ( n <= 0 ) { + ch = ' '; + break; + } ch = *fileBufferPtr++; } - --fileBufferPtr; - } + --fileBufferPtr; + } else { type = RTFTokenizer::ControlWord; @@ -212,14 +182,16 @@ // Everything until next backslash, opener or closer while ( ch != '\\' && ch != '{' && ch != '}' && ch != '\n' && - ch != '\r' && fileBufferPtr <= fileBufferEnd ) + ch != '\r') { *_text++ = ch; + if(fileBufferPtr >= fileBufferEnd) + break; ch = *fileBufferPtr++; } - - // Give back last char - --fileBufferPtr; + if(fileBufferPtr < fileBufferEnd) + --fileBufferPtr; // give back the last char } *_text++ = 0; + } Index: filters/kword/rtf/import/rtfimport_tokenizer.h =================================================================== --- filters/kword/rtf/import/rtfimport_tokenizer.h (revision 467594) +++ filters/kword/rtf/import/rtfimport_tokenizer.h (working copy) @@ -35,6 +35,8 @@ // tokenizer (private) data private: + int nextChar(); + QFile *infile; QByteArray fileBuffer; QCString tokenText;