Class REXML::Text
In:
Parent: Child

Represents text nodes in an XML document

Methods

Included Modules

Comparable

Constants

SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]   The order in which the substitutions occur
SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
SLAICEPS = [ '<', '>', '"', "'", '&' ]   Characters which are substituted in written strings
SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
REFERENCE = /#{Entity::REFERENCE}/
EREFERENCE = /&(?!#{Entity::NAME};)/

Attributes

raw  [RW]  If raw is true, then REXML leaves the value alone

Public Class methods

Constructor arg if a String, the content is set to the String. If a Text, the object is shallowly cloned.

respect_whitespace (boolean, false) if true, whitespace is respected

parent (nil) if this is a Parent object, the parent will be set to this.

raw (nil) This argument can be given three values. If true, then the value of used to construct this object is expected to contain no unescaped XML markup, and REXML will not change the text. If this value is false, the string may contain any characters, and REXML will escape any and all defined entities whose values are contained in the text. If this value is nil (the default), then the raw value of the parent will be used as the raw value for this node. If there is no raw value for the parent, and no value is supplied, the default is false.

  Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
  Text.new( "<&", false, nil, true )  #-> IllegalArgumentException
  Text.new( "&lt;&amp;", false, nil, true )  #-> "&lt;&amp;"
  # Assume that the entity "s" is defined to be "sean"
  # and that the entity    "r" is defined to be "russell"
  Text.new( "sean russell" )          #-> "&s; &r;"
  Text.new( "sean russell", false, nil, true ) #-> "sean russell"

entity_filter (nil) This can be an array of entities to match in the supplied text. This argument is only useful if raw is set to false.

  Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
  Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"

In the last example, the entity_filter argument is ignored.

pattern INTERNAL USE ONLY

[Source]

57:     def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, 
58:       entity_filter=nil, illegal=ILLEGAL )
59: 
60:       @raw = false
61: 
62:       if parent
63:         super( parent )
64:         @raw = parent.raw 
65:       else
66:         @parent = nil
67:       end
68: 
69:       @raw = raw unless raw.nil?
70:       @entity_filter = entity_filter
71:       @normalized = @unnormalized = nil
72: 
73:       if arg.kind_of? String
74:         @string = arg.clone
75:         @string.squeeze!(" \n\t") unless respect_whitespace
76:       elsif arg.kind_of? Text
77:         @string = arg.to_s
78:         @raw = arg.raw
79:       elsif
80:         raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
81:       end
82: 
83:       @string.gsub!( /\r\n?/, "\n" )
84: 
85:       # check for illegal characters
86:       if @raw
87:         if @string =~ illegal
88:           raise "Illegal character '#{$1}' in raw string \"#{@string}\""
89:         end
90:       end
91:     end

Escapes all possible entities

[Source]

284:     def Text::normalize( input, doctype=nil, entity_filter=nil )
285:       copy = input.clone
286:       # Doing it like this rather than in a loop improves the speed
287:       if doctype
288:         copy = copy.gsub( EREFERENCE, '&amp;' )
289:         doctype.entities.each_value do |entity|
290:           copy = copy.gsub( entity.value, 
291:             "&#{entity.name};" ) if entity.value and 
292:               not( entity_filter and entity_filter.include?(entity) )
293:         end
294:       else
295:         copy = copy.gsub( EREFERENCE, '&amp;' )
296:         DocType::DEFAULT_ENTITIES.each_value do |entity|
297:           copy = copy.gsub(entity.value, "&#{entity.name};" )
298:         end
299:       end
300:       copy
301:     end

Reads text, substituting entities

[Source]

258:     def Text::read_with_substitution( input, illegal=nil )
259:       copy = input.clone
260: 
261:       if copy =~ illegal
262:         raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
263:       end if illegal
264:       
265:       copy.gsub!( /\r\n?/, "\n" )
266:       if copy.include? ?&
267:         copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
268:         copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
269:         copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
270:         copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
271:         copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
272:         copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
273:           m=$1
274:           #m='0' if m==''
275:           m = "0#{m}" if m[0] == ?x
276:           [Integer(m)].pack('U*')
277:         }
278:       end
279:       copy
280:     end

Unescapes all possible entities

[Source]

304:     def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
305:       rv = string.clone
306:       rv.gsub!( /\r\n?/, "\n" )
307:       matches = rv.scan( REFERENCE )
308:       return rv if matches.size == 0
309:       rv.gsub!( NUMERICENTITY ) {|m|
310:         m=$1
311:         m = "0#{m}" if m[0] == ?x
312:         [Integer(m)].pack('U*')
313:       }
314:       matches.collect!{|x|x[0]}.compact!
315:       if matches.size > 0
316:         if doctype
317:           matches.each do |entity_reference|
318:             unless filter and filter.include?(entity_reference)
319:               entity_value = doctype.entity( entity_reference )
320:               re = /&#{entity_reference};/
321:               rv.gsub!( re, entity_value ) if entity_value
322:             end
323:           end
324:         else
325:           matches.each do |entity_reference|
326:             unless filter and filter.include?(entity_reference)
327:               entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
328:               re = /&#{entity_reference};/
329:               rv.gsub!( re, entity_value.value ) if entity_value
330:             end
331:           end
332:         end
333:         rv.gsub!( /&amp;/, '&' )
334:       end
335:       rv
336:     end

Public Instance methods

Appends text to this text node. The text is appended in the raw mode of this text node.

[Source]

109:     def <<( to_append )
110:       @string << to_append.gsub( /\r\n?/, "\n" )
111:     end

other a String or a Text returns the result of (to_s <=> arg.to_s)

[Source]

116:     def <=>( other )
117:       to_s() <=> other.to_s
118:     end

[Source]

102:     def clone
103:       return Text.new(self)
104:     end

[Source]

97:     def empty?
98:       @string.size==0
99:     end

[Source]

199:      def indent_text(string, level=1, style="\t", indentfirstline=true)
200:       return string if level < 0
201:        new_string = ''
202:        string.each { |line|
203:          indent_string = style * level
204:          new_line = (indent_string + line).sub(/[\s]+$/,'')
205:          new_string << new_line
206:        }
207:        new_string.strip! unless indentfirstline
208:        return new_string
209:      end

[Source]

147:     def inspect
148:       @string.inspect
149:     end

[Source]

93:     def node_type
94:       :text
95:     end

Returns the string value of this text node. This string is always escaped, meaning that it is a valid XML text node string, and all entities that can be escaped, have been inserted. This method respects the entity filter set in the constructor.

  # Assume that the entity "s" is defined to be "sean", and that the
  # entity "r" is defined to be "russell"
  t = Text.new( "< & sean russell", false, nil, false, ['s'] )
  t.to_s   #-> "&lt; &amp; &s; russell"
  t = Text.new( "< & &s; russell", false, nil, false )
  t.to_s   #-> "&lt; &amp; &s; russell"
  u = Text.new( "sean russell", false, nil, true )
  u.to_s   #-> "sean russell"

[Source]

134:     def to_s
135:       return @string if @raw
136:       return @normalized if @normalized
137: 
138:       doctype = nil
139:       if @parent
140:         doc = @parent.document
141:         doctype = doc.doctype if doc
142:       end
143: 
144:       @normalized = Text::normalize( @string, doctype, @entity_filter )
145:     end

Returns the string value of this text. This is the text without entities, as it might be used programmatically, or printed to the console. This ignores the ‘raw’ attribute setting, and any entity_filter.

  # Assume that the entity "s" is defined to be "sean", and that the
  # entity "r" is defined to be "russell"
  t = Text.new( "< & sean russell", false, nil, false, ['s'] )
  t.string   #-> "< & sean russell"
  t = Text.new( "< & &s; russell", false, nil, false )
  t.string   #-> "< & sean russell"
  u = Text.new( "sean russell", false, nil, true )
  u.string   #-> "sean russell"

[Source]

164:     def value
165:       @unnormalized if @unnormalized
166:       doctype = nil
167:       if @parent
168:         doc = @parent.document
169:         doctype = doc.doctype if doc
170:       end
171:       @unnormalized = Text::unnormalize( @string, doctype )
172:     end

Sets the contents of this text node. This expects the text to be unnormalized. It returns self.

  e = Element.new( "a" )
  e.add_text( "foo" )   # <a>foo</a>
  e[0].value = "bar"    # <a>bar</a>
  e[0].value = "<a>"    # <a>&lt;a&gt;</a>

[Source]

192:     def value=( val )
193:       @string = val.gsub( /\r\n?/, "\n" )
194:       @unnormalized = nil
195:       @normalized = nil
196:       @raw = false
197:     end

[Source]

174:      def wrap(string, width, addnewline=false)
175:        # Recursivly wrap string at width.
176:        return string if string.length <= width
177:        place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
178:        if addnewline then
179:          return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
180:        else
181:          return string[0,place] + "\n" + wrap(string[place+1..-1], width)
182:        end
183:      end

[Source]

211:     def write( writer, indent=-1, transitive=false, ie_hack=false ) 
212:       s = to_s()
213:       if not (@parent and @parent.whitespace) then
214:         s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
215:         if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
216:           s = indent_text(s, indent, @parent.context[:indentstyle], false)
217:         end
218:         s.squeeze!(" \n\t") if @parent and !@parent.whitespace
219:       end
220:       writer << s
221:     end

Writes out text, substituting special characters beforehand. out A String, IO, or any other object supporting <<( String ) input the text to substitute and the write out

  z=utf8.unpack("U*")
  ascOut=""
  z.each{|r|
    if r <  0x100
      ascOut.concat(r.chr)
    else
      ascOut.concat(sprintf("&#x%x;", r))
    end
  }
  puts ascOut

[Source]

245:     def write_with_substitution out, input
246:       copy = input.clone
247:       # Doing it like this rather than in a loop improves the speed
248:       copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
249:       copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
250:       copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
251:       copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
252:       copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
253:       copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
254:       out << copy
255:     end

FIXME This probably won’t work properly

[Source]

225:     def xpath
226:       path = @parent.xpath
227:       path += "/text()"
228:       return path
229:     end

Search

Google

Ruby API Docs

Links