#!/usr/bin/python

import urllib
import sys
import popen2
import re
import htmlentitydefs

def convert_entity(m):
    text = m.group(0)
    try:
        return htmlentitydefs.entitydefs[text[1:-1]]
    except KeyError:
        pass
    return text

def load_and_apply_XSLT(url, xsltfile):
    page = urllib.urlopen(url).read()
    (childstdout, childstdin) = popen2.popen2('xsltproc --nonet --html %s - 2> /dev/null | xmllint --format -' % xsltfile)
    childstdin.write(page)
    childstdin.close()
    xml = childstdout.read()
    childstdout.close()
    return xml

def check_links(xml, url, pagename):
    links = re.findall(r'<link[^>]*>', xml)
    nlinks = len(links)
    if nlinks < 1:
        print '\nError: No links on %s page %s!' % (pagename, url)
        sys.exit(1)

    ntitles = len(re.findall(r'<title>[^<]+</title>', xml))
    if ntitles != nlinks:
        print '\nError: Some links do not have titles on %s page %s.' % (pagename, url)
        sys.exit(1)

    urls = re.findall(r'<url>[^<]+</url>', xml)
    nurls = len(urls)
    if nurls != nlinks:
        print  '\nError: Some links do not have urls on %s page %s.' % (pagename, url)
        sys.exit(1)

    return (links, urls)

print 'Running unit test for YLE Areena service defintions.\n'

######

print 'Checking main page...',

mainurl = 'http://areena.yle.fi/selaa'
mainpagexml = load_and_apply_XSLT(mainurl, '../services/yleareena/programlist.xsl')
(links, urls) = check_links(mainpagexml, mainurl, 'main')

print 'OK'

######

naviurl = re.sub("&\w+;", convert_entity, urls[0][5:-6])

print "Checking navigation page %s..." % naviurl, 

navipage = urllib.urlopen(naviurl).read()
navipagexml = load_and_apply_XSLT(naviurl, '../services/yleareena/navigation.xsl')
(links, urls) = check_links(navipagexml, naviurl, 'navigation')

print 'OK'

######

print "Checking video pages...",

for i in xrange(len(links)):
    if 'type="video"' in links[i]:
        m = re.search('template="(\w+)"', links[i])
        if m is None:
            template = 'video'
        else:
            template = m.group(1)

        videourl = re.sub("&\w+;", convert_entity, urls[i][5:-6])
        videoxml = load_and_apply_XSLT(videourl, '../services/yleareena/%s.xsl' % template)

        if re.search(r'<title>[^<]+</title>', videoxml) is None:
            print '\nError: No title on video page %s.' % videourl
            sys.exit(1)
        m = re.search(r'<url>([^<]+)</url>', videoxml)
        if m is None:
            print '\nError: No URL on video page %s.' % videourl
            sys.exit(1)

        asxurl = m.group(1)
        asxpage = urllib.urlopen(asxurl).read()
        m = re.search(r'<REF HREF="([^"]+)">', asxpage)
        if m is None:
            print '\nWarning: Failed to parse ASX file (on page %s).' % videourl
            #sys.exit(1)

print 'OK'

######

print 'Checking search results page...',

searchurl = 'http://areena.yle.fi/hae?keyword=uutiset+nyt'
searchpagexml = load_and_apply_XSLT(searchurl, '../services/yleareena/navigation.xsl')
check_links(searchpagexml, searchurl, 'search results')

print 'OK'
print

