| Class | REXML::Text |
| In: |
|
| Parent: | Child |
Represents text nodes in an XML document
Included Modules
Constants
| SPECIALS | = | [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ] | The order in which the substitutions occur | |
| SUBSTITUTES | = | ['&', '<', '>', '"', ''', ' '] | ||
| SLAICEPS | = | [ '<', '>', '"', "'", '&' ] | Characters which are substituted in written strings | |
| SETUTITSBUS | = | [ /</u, />/u, /"/u, /'/u, /&/u ] | ||
| ILLEGAL | = | /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um | ||
| NUMERICENTITY | = | /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ | ||
| REFERENCE | = | /#{Entity::REFERENCE}/ | ||
| EREFERENCE | = | /&(?!#{Entity::NAME};)/ |
Attributes
| raw | [RW] | If raw is true, then REXML leaves the value alone |
Public Class methods
Constructor arg if a String, the content is set to the String. If a Text, the object is shallowly cloned.
respect_whitespace (boolean, false) if true, whitespace is respected
parent (nil) if this is a Parent object, the parent will be set to this.
raw (nil) This argument can be given three values. If true, then the value of used to construct this object is expected to contain no unescaped XML markup, and REXML will not change the text. If this value is false, the string may contain any characters, and REXML will escape any and all defined entities whose values are contained in the text. If this value is nil (the default), then the raw value of the parent will be used as the raw value for this node. If there is no raw value for the parent, and no value is supplied, the default is false.
Text.new( "<&", false, nil, false ) #-> "<&" Text.new( "<&", false, nil, true ) #-> IllegalArgumentException Text.new( "<&", false, nil, true ) #-> "<&" # Assume that the entity "s" is defined to be "sean" # and that the entity "r" is defined to be "russell" Text.new( "sean russell" ) #-> "&s; &r;" Text.new( "sean russell", false, nil, true ) #-> "sean russell"
entity_filter (nil) This can be an array of entities to match in the supplied text. This argument is only useful if raw is set to false.
Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell" Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
In the last example, the entity_filter argument is ignored.
pattern INTERNAL USE ONLY
57: def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, 58: entity_filter=nil, illegal=ILLEGAL ) 59: 60: @raw = false 61: 62: if parent 63: super( parent ) 64: @raw = parent.raw 65: else 66: @parent = nil 67: end 68: 69: @raw = raw unless raw.nil? 70: @entity_filter = entity_filter 71: @normalized = @unnormalized = nil 72: 73: if arg.kind_of? String 74: @string = arg.clone 75: @string.squeeze!(" \n\t") unless respect_whitespace 76: elsif arg.kind_of? Text 77: @string = arg.to_s 78: @raw = arg.raw 79: elsif 80: raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})" 81: end 82: 83: @string.gsub!( /\r\n?/, "\n" ) 84: 85: # check for illegal characters 86: if @raw 87: if @string =~ illegal 88: raise "Illegal character '#{$1}' in raw string \"#{@string}\"" 89: end 90: end 91: end
Escapes all possible entities
284: def Text::normalize( input, doctype=nil, entity_filter=nil ) 285: copy = input.clone 286: # Doing it like this rather than in a loop improves the speed 287: if doctype 288: copy = copy.gsub( EREFERENCE, '&' ) 289: doctype.entities.each_value do |entity| 290: copy = copy.gsub( entity.value, 291: "&#{entity.name};" ) if entity.value and 292: not( entity_filter and entity_filter.include?(entity) ) 293: end 294: else 295: copy = copy.gsub( EREFERENCE, '&' ) 296: DocType::DEFAULT_ENTITIES.each_value do |entity| 297: copy = copy.gsub(entity.value, "&#{entity.name};" ) 298: end 299: end 300: copy 301: end
Reads text, substituting entities
258: def Text::read_with_substitution( input, illegal=nil ) 259: copy = input.clone 260: 261: if copy =~ illegal 262: raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" ) 263: end if illegal 264: 265: copy.gsub!( /\r\n?/, "\n" ) 266: if copy.include? ?& 267: copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] ) 268: copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] ) 269: copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] ) 270: copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] ) 271: copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] ) 272: copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m| 273: m=$1 274: #m='0' if m=='' 275: m = "0#{m}" if m[0] == ?x 276: [Integer(m)].pack('U*') 277: } 278: end 279: copy 280: end
Unescapes all possible entities
304: def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) 305: rv = string.clone 306: rv.gsub!( /\r\n?/, "\n" ) 307: matches = rv.scan( REFERENCE ) 308: return rv if matches.size == 0 309: rv.gsub!( NUMERICENTITY ) {|m| 310: m=$1 311: m = "0#{m}" if m[0] == ?x 312: [Integer(m)].pack('U*') 313: } 314: matches.collect!{|x|x[0]}.compact! 315: if matches.size > 0 316: if doctype 317: matches.each do |entity_reference| 318: unless filter and filter.include?(entity_reference) 319: entity_value = doctype.entity( entity_reference ) 320: re = /&#{entity_reference};/ 321: rv.gsub!( re, entity_value ) if entity_value 322: end 323: end 324: else 325: matches.each do |entity_reference| 326: unless filter and filter.include?(entity_reference) 327: entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ] 328: re = /&#{entity_reference};/ 329: rv.gsub!( re, entity_value.value ) if entity_value 330: end 331: end 332: end 333: rv.gsub!( /&/, '&' ) 334: end 335: rv 336: end
Public Instance methods
Appends text to this text node. The text is appended in the raw mode of this text node.
109: def <<( to_append ) 110: @string << to_append.gsub( /\r\n?/, "\n" ) 111: end
199: def indent_text(string, level=1, style="\t", indentfirstline=true) 200: return string if level < 0 201: new_string = '' 202: string.each { |line| 203: indent_string = style * level 204: new_line = (indent_string + line).sub(/[\s]+$/,'') 205: new_string << new_line 206: } 207: new_string.strip! unless indentfirstline 208: return new_string 209: end
Returns the string value of this text node. This string is always escaped, meaning that it is a valid XML text node string, and all entities that can be escaped, have been inserted. This method respects the entity filter set in the constructor.
# Assume that the entity "s" is defined to be "sean", and that the # entity "r" is defined to be "russell" t = Text.new( "< & sean russell", false, nil, false, ['s'] ) t.to_s #-> "< & &s; russell" t = Text.new( "< & &s; russell", false, nil, false ) t.to_s #-> "< & &s; russell" u = Text.new( "sean russell", false, nil, true ) u.to_s #-> "sean russell"
134: def to_s 135: return @string if @raw 136: return @normalized if @normalized 137: 138: doctype = nil 139: if @parent 140: doc = @parent.document 141: doctype = doc.doctype if doc 142: end 143: 144: @normalized = Text::normalize( @string, doctype, @entity_filter ) 145: end
Returns the string value of this text. This is the text without entities, as it might be used programmatically, or printed to the console. This ignores the ‘raw’ attribute setting, and any entity_filter.
# Assume that the entity "s" is defined to be "sean", and that the # entity "r" is defined to be "russell" t = Text.new( "< & sean russell", false, nil, false, ['s'] ) t.string #-> "< & sean russell" t = Text.new( "< & &s; russell", false, nil, false ) t.string #-> "< & sean russell" u = Text.new( "sean russell", false, nil, true ) u.string #-> "sean russell"
164: def value 165: @unnormalized if @unnormalized 166: doctype = nil 167: if @parent 168: doc = @parent.document 169: doctype = doc.doctype if doc 170: end 171: @unnormalized = Text::unnormalize( @string, doctype ) 172: end
Sets the contents of this text node. This expects the text to be unnormalized. It returns self.
e = Element.new( "a" ) e.add_text( "foo" ) # <a>foo</a> e[0].value = "bar" # <a>bar</a> e[0].value = "<a>" # <a><a></a>
192: def value=( val ) 193: @string = val.gsub( /\r\n?/, "\n" ) 194: @unnormalized = nil 195: @normalized = nil 196: @raw = false 197: end
174: def wrap(string, width, addnewline=false) 175: # Recursivly wrap string at width. 176: return string if string.length <= width 177: place = string.rindex(' ', width) # Position in string with last ' ' before cutoff 178: if addnewline then 179: return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) 180: else 181: return string[0,place] + "\n" + wrap(string[place+1..-1], width) 182: end 183: end
211: def write( writer, indent=-1, transitive=false, ie_hack=false ) 212: s = to_s() 213: if not (@parent and @parent.whitespace) then 214: s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all 215: if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0 216: s = indent_text(s, indent, @parent.context[:indentstyle], false) 217: end 218: s.squeeze!(" \n\t") if @parent and !@parent.whitespace 219: end 220: writer << s 221: end
Writes out text, substituting special characters beforehand. out A String, IO, or any other object supporting <<( String ) input the text to substitute and the write out
z=utf8.unpack("U*")
ascOut=""
z.each{|r|
if r < 0x100
ascOut.concat(r.chr)
else
ascOut.concat(sprintf("&#x%x;", r))
end
}
puts ascOut
245: def write_with_substitution out, input 246: copy = input.clone 247: # Doing it like this rather than in a loop improves the speed 248: copy.gsub!( SPECIALS[0], SUBSTITUTES[0] ) 249: copy.gsub!( SPECIALS[1], SUBSTITUTES[1] ) 250: copy.gsub!( SPECIALS[2], SUBSTITUTES[2] ) 251: copy.gsub!( SPECIALS[3], SUBSTITUTES[3] ) 252: copy.gsub!( SPECIALS[4], SUBSTITUTES[4] ) 253: copy.gsub!( SPECIALS[5], SUBSTITUTES[5] ) 254: out << copy 255: end