" Vim indent file " Language: DTD (Document Type Definition for XML) " Maintainer: Nikolai Weibull " Latest Revision: 2011-07-08 let s:cpo_save = &cpo set cpo&vim setlocal indentexpr=GetDTDIndent() setlocal indentkeys=!^F,o,O,> setlocal nosmartindent if exists("*GetDTDIndent") finish endif " TODO: Needs to be adjusted to stop at [, <, and ]. let s:token_pattern = '^[^[:space:]]\+' function s:lex1(input, start, ...) let pattern = a:0 > 0 ? a:1 : s:token_pattern let start = matchend(a:input, '^\_s*', a:start) if start == -1 return ["", a:start] endif let end = matchend(a:input, pattern, start) if end == -1 return ["", a:start] endif let token = strpart(a:input, start, end - start) return [token, end] endfunction function s:lex(input, start, ...) let pattern = a:0 > 0 ? a:1 : s:token_pattern let info = s:lex1(a:input, a:start, pattern) while info[0] == '--' let info = s:lex1(a:input, info[1], pattern) while info[0] != "" && info[0] != '--' let info = s:lex1(a:input, info[1], pattern) endwhile if info[0] == "" return info endif let info = s:lex1(a:input, info[1], pattern) endwhile return info endfunction function s:indent_to_innermost_parentheses(line, end) let token = '(' let end = a:end let parentheses = [end - 1] while token != "" let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=') if token[0] == '(' call add(parentheses, end - 1) elseif token[0] == ')' if len(parentheses) == 1 return [-1, end] endif call remove(parentheses, -1) endif endwhile return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end] endfunction " TODO: Line and end could be script global (think OO members). function GetDTDIndent() if v:lnum == 1 return 0 endif " Begin by searching back for a " return indent endif endwhile return -1 elseif declaration == 'ELEMENT' " Check for element name. If none exists, indent one level. let [name, end] = s:lex(line, end) if name == "" return indent + &sw endif " Check for token following element name. This can be a specification of " whether the start or end tag may be omitted. If nothing is found, indent " one level. let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') let n = 0 while token =~ '[-O]' && n < 2 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') let n += 1 endwhile if token == "" return indent + &sw endif " Next comes the content model. If the token we’ve found isn’t a " parenthesis it must be either ANY, EMPTY or some random junk. Either " way, we’re done indenting this element, so set it to that of the first " line so that the terminating “>” winds up having the same indention. if token != '(' return indent endif " Now go through the content model. We need to keep track of the nesting " of parentheses. As soon as we hit 0 we’re done. If that happens we must " have a complete content model. Thus set indention to be the same as that " of the first line so that the terminating “>” winds up having the same " indention. Otherwise, we’ll indent to the innermost parentheses not yet " matched. let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) if indent_of_innermost != -1 return indent_of_innermost endif " Finally, look for any additions and/or exceptions to the content model. " This is defined by a “+” or “-” followed by another content model " declaration. " TODO: Can the “-” be separated by whitespace from the “(”? let seen = { '+(': 0, '-(': 0 } while 1 let [additions_exceptions, end] = s:lex(line, end, '^[+-](') if additions_exceptions != '+(' && additions_exceptions != '-(' let [token, end] = s:lex(line, end) if token == '>' return indent endif " TODO: Should use s:lex here on getline(v:lnum) and check for >. return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + &sw) endif " If we’ve seen an addition or exception already and this is of the same " kind, the user is writing a broken DTD. Time to bail. if seen[additions_exceptions] return indent endif let seen[additions_exceptions] = 1 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) if indent_of_innermost != -1 return indent_of_innermost endif endwhile elseif declaration == 'ATTLIST' " Check for element name. If none exists, indent one level. let [name, end] = s:lex(line, end) if name == "" return indent + &sw endif " Check for any number of attributes. while 1 " Check for attribute name. If none exists, indent one level, unless the " current line is a lone “>”, in which case we indent to the same level " as the first line. Otherwise, if the attribute name is “>”, we have " actually hit the end of the attribute list, in which case we indent to " the same level as the first line. let [name, end] = s:lex(line, end) if name == "" " TODO: Should use s:lex here on getline(v:lnum) and check for >. return getline(v:lnum) =~ '^\s*>' ? indent : (indent + &sw) elseif name == ">" return indent endif " Check for attribute value declaration. If none exists, indent two " levels. Otherwise, if it’s an enumerated value, check for nested " parentheses and indent to the innermost one if we don’t reach the end " of the listc. Otherwise, just continue with looking for the default " attribute value. " TODO: Do validation of keywords " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)? let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') if value == "" return indent + &sw * 2 elseif value == 'NOTATION' " If this is a enumerated value based on notations, read another token " for the actual value. If it doesn’t exist, indent three levels. " TODO: If validating according to above, value must be equal to '('. let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') if value == "" return indent + &sw * 3 endif endif if value == '(' let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) if indent_of_innermost != -1 return indent_of_innermost endif endif " Finally look for the attribute’s default value. If non exists, indent " two levels. let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)') if default == "" return indent + &sw * 2 elseif default == '#FIXED' " We need to look for the fixed value. If non exists, indent three " levels. let [default, end] = s:lex(line, end, '^"\_[^"]*"') if default == "" return indent + &sw * 3 endif endif endwhile elseif declaration == 'ENTITY' " Check for entity name. If none exists, indent one level. Otherwise, if " the name actually turns out to be a percent sign, “%”, this is a " parameter entity. Read another token to determine the entity name and, " again, if none exists, indent one level. let [name, end] = s:lex(line, end) if name == "" return indent + &sw elseif name == '%' let [name, end] = s:lex(line, end) if name == "" return indent + &sw endif endif " Now check for the entity value. If none exists, indent one level. If it " does exist, indent to same level as first line, as we’re now done with " this entity. " " The entity value can be a string in single or double quotes (no escapes " to worry about, as entities are used instead). However, it can also be " that this is an external unparsed entity. In that case we have to look " further for (possibly) a public ID and an URI followed by the NDATA " keyword and the actual notation name. For the public ID and URI, indent " two levels, if they don’t exist. If the NDATA keyword doesn’t exist, " indent one level. Otherwise, if the actual notation name doesn’t exist, " indent two level. If it does, indent to same level as first line, as " we’re now done with this entity. let [value, end] = s:lex(line, end) if value == "" return indent + &sw elseif value == 'SYSTEM' || value == 'PUBLIC' let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') if quoted_string == "" return indent + &sw * 2 endif if value == 'PUBLIC' let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') if quoted_string == "" return indent + &sw * 2 endif endif let [ndata, end] = s:lex(line, end) if ndata == "" return indent + &sw endif let [name, end] = s:lex(line, end) return name == "" ? (indent + &sw * 2) : indent else return indent endif elseif declaration == 'NOTATION' " Check for notation name. If none exists, indent one level. let [name, end] = s:lex(line, end) if name == "" return indent + &sw endif " Now check for the external ID. If none exists, indent one level. let [id, end] = s:lex(line, end) if id == "" return indent + &sw elseif id == 'SYSTEM' || id == 'PUBLIC' let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') if quoted_string == "" return indent + &sw * 2 endif if id == 'PUBLIC' let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)') if quoted_string == "" " TODO: Should use s:lex here on getline(v:lnum) and check for >. return getline(v:lnum) =~ '^\s*>' ? indent : (indent + &sw * 2) elseif quoted_string == '>' return indent endif endif endif return indent endif " TODO: Processing directives could be indented I suppose. But perhaps it’s " just as well to let the user decide how to indent them (perhaps extending " this function to include proper support for whatever processing directive " language they want to use). " Conditional sections are simply passed along to let Vim decide what to do " (and hence the user). return -1 endfunction let &cpo = s:cpo_save unlet s:cpo_save