#!/usr/bin/python
# coding=utf-8
#
"""Usage: lodo2ledger [--help] <year> [<year> ...]

Extract accounting transactions from LODO in the format of ledger
and hledger.

"""
import urllib2
import urllib
from cookielib import CookieJar
import urlparse
import lxml.html
import getpass
import time
import re
import lxml.etree as etree
import getopt

username = "pere@hungry.com"
password = getpass.getpass()
dbname   = "NORWEGIAN_UNIX_USER_GROUP"

# make sure stdout is UTF-8, also when redirecting to pipe or file
import sys
import codecs
sys.stdout = codecs.getwriter('utf8')(sys.stdout)

agroups = {
    'b' : 'balance',
    'c' : 'customer',
    's' : 'supplier',
    'r' : 'result',
    'e' : 'ansatt',
    }

def ledger_entry(date, type, id, descriptions, entries):
    print "%s %s%s %s" % (date, type, id, descriptions[0])
    for entry in sorted(entries):
        (account, c, d) = entry
        if 0 == c:
            value = d
        else:
            value = -c
        print "  %-50s %10.2f" % (account, value)
    print

def parse_voucher_url(opener, url):
    response = opener.open(url)
    html = response.read()
    # Remove duplicate, redundant and wrong charset info
    html = html.replace('<meta http-equiv="content-type" content="text/html; charset=macintosh">', "")
    root = lxml.html.fromstring(html)
#    print(etree.tostring(root, pretty_print=True))

    datatbl = root.cssselect("table.lodo_data")
    form = root.cssselect("table.lodo_data form[name='voucher_2']")
    if not form:
        return False
    vid = form[0].cssselect("input[name='voucher.VoucherID']")[0].value
    jid = form[0].cssselect("input[name='voucher.JournalID']")[0].value
    date = form[0].cssselect("input[name='voucher.VoucherDate']")[0].value
    vtype = form[0].cssselect("input[name='VoucherType']")[0].value
    descriptions = []
    invoiceids = []
    entries = []
    values = root.cssselect("table.lodo_data tr.voucher")
    rows = values[2:]
    for tr in rows:
        s = tr.cssselect('select option[selected]')[0]
        account = s.text_content()

        # avoid more than one space in account name to not confuse ledger
        account = account.replace("  ", " ")

        # change trailing (X) to toplevel account name
        match = re.match("^(.*) \((.)\)$", account)
        account = "%s:%s" % (agroups[match.group(2)], match.group(1))

        d = tr.cssselect("input[name='voucher.AmountIn']")[0].value.replace(",", ".").replace(" ", "")
        c = tr.cssselect("input[name='voucher.AmountOut']")[0].value.replace(",", ".").replace(" ", "")
        description = tr.cssselect("input.voucher[name='voucher.Description']")[0].value
        invoiceid = tr.cssselect("input.voucher[name='voucher.InvoiceID']")[0].value
        if "" != invoiceid:
            description = "(faktura #%s) %s" % (invoiceid, description)
#        print description, d, c
        entries.append( (account, float(c), float(d)) )

    if len(descriptions) == 0 or descriptions[0] != description:
        descriptions.append(description)
        ledger_entry(date, vtype, jid, descriptions, entries)
    return True

def parse_voucher(opener, vtype, vid):
    url = "https://login.lodo.no/index.php?t=journal.edit&voucher_VoucherType=%c&voucher_JournalID=%d&action_journalid_search=1" % (vtype, vid)
    return parse_voucher_url(opener, url)

def lodo_login(username, password, dbname):
    cj = CookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    formdata = {
        "LoginFormDate" : "2012-12-14",
        "username" : username,
        "password": password,
        "DB_NAME_LOGIN" : dbname,
        }
    data_encoded = urllib.urlencode(formdata)
    url = "https://login.lodo.no/lodo.php?t=lib.login&interf="
    response = opener.open(url, data_encoded)
    content = response.read()
    return opener

def scrape_all_ids(opener):
    types = (
        ('A', 100000, 2),
        ('K', 0, 100),
        ('S', 1000, 2),
        ('B', 1000, 2),
        ('U', 1000, 2),
        )

    for vtype, startid, faillimit in types:
        failed = 0
        vid = startid
        while failed < faillimit:
            print >> sys.stderr, "checking %c %d" % (vtype, vid)
            if not parse_voucher(opener, vtype, vid):
                failed = failed + 1
            else:
                failed = 0
            vid = vid + 1
#            if id > startid + 5: break
            time.sleep(3)

# hovedbok oversikt
def scrape_hovedbook(opener, year):
    formdata = {
        "report.FromPeriod" : "%d-01" % year,
        "report.ToPeriod"   : "%d-13" % year,
        "report.Type"       : "hovedbok",
        "report.Sort"       : "VoucherDate",
        }
    data_encoded = urllib.urlencode(formdata)
    url = "https://login.lodo.no/lodo.php?view_mvalines=&view_linedetails=&t=report.hovedbokvoucherprint"
    response = opener.open(url, data_encoded)
    html = response.read()
#    html = html.replace('<meta http-equiv="content-type" content="text/html; charset=macintosh">', "")
    root = lxml.html.fromstring(html)
#    print html
#    print(etree.tostring(root, pretty_print=True))
    links = root.cssselect("td a")
    seen = {}
    for link in links:
        href = link.get('href')
        if -1 != href.find("index.php?t=journal.edit&voucher_VoucherType="):
            newlink = urlparse.urljoin(url, href)
            if not newlink in seen:
                #print newlink
                parse_voucher_url(opener, newlink)
                seen[newlink] = True
                time.sleep(3)

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
    except getopt.error, msg:
        print msg
        print "for help use --help"
        sys.exit(2)

    for o, a in opts:
        if o in ("-h", "--help"):
            print __doc__
            sys.exit(0)

    opener = lodo_login(username, password, dbname)
    for arg in args:
        scrape_hovedbook(opener, int(arg))

#scrape_all_ids(opener)

# Fetches report page, no use for it
    if False:
        url = "https://login.lodo.no/lodo.php?view_mvalines=&view_linedetails=&t=report.list"
        response = opener.open(url)
        content=response.read()

if __name__ == "__main__":
    main()