; rss.vnm
; test parsing an RSS feed with Protocol analyser
;
; Illustates fetching a simple http URL and use of the
; protocol analyser V2.
; For Venom 2
; 2010 11 09 : updated as URL of BBC news feed has changed
To init
New Ethernet
Make tcp TCProt
Make pa ProtAnalyser(tcp, 0)
Make s String(100)
End
To main
; note how the URL
; "http://feeds.bbci.co.uk/news/rss.xml?edition=uk"
; is broken down.
; HTTP uses port 80, which we use when opening the connection.
; "feeds.bbci.co.uk" is is the hostname, which we use when opening
; the connection and again in a "host:" header after sending the GET request
; "/news/rss.xml?edition=uk" is the path we specify in the GET request
;
If tcp.Open("feeds.bbci.co.uk", 80)
[
Print "http connection opened",CR
Print To tcp, "GET /news/rss.xml?edition=uk HTTP/1.1",CR,
"host: feeds.bbci.co.uk",CR, CR
Print "=headers=",CR
While tcp.Get(s) > 0 ; read headers until empty line = end of headers
Print s, CR
Print "==",CR
pa.Reset
; every title we want to display is a
element inside an - element
; there are a couple of elements at the beginning of the file that
; do not apply
tcp.TimeOut := 5000
While pa.Find("
- ", 0)
[
If pa.Find("", 0)
[
pa.Get(s, "<")
convert_entity(s)
Print "Title: ", s, CR
]
]
tcp.Close
tcp.Reset
]
Else
Print "tcp open failed, status ", tcp.Status:1, CR
tcp.Reset
End
; table for converting certain HTML codes back into characters */
; the first entry in each pair is a string preceded by '&' and
; followed by ';' in the XML/HTML
; e.g. "&" represents '&'
Array entities("", 8)
"amp" "&"
"apos" "'"
"pound" "£"
"quot", "\""
End
; convert XML/HTML character entity codes in a string
To convert_entity(s)
Local c
Local n
AutoDestruct
Local s1 := New String(100) ; holder for converted string
Local pa1 := New ProtAnalyser(s, 1)
s.Reset
s1.Empty
While s.Queue
[
c := pa1.Get
If c = '&'
[
; this uses protanalyser "array of names" feature
n := pa1.Get(entities, ";")
If n <> -1
Print To s1, entities.(n + 1)
]
Else
s1.Put(c)
]
s.Empty
s.Put(s1) ; copy converted string back to original
End