#!/usr/bin/env python
# -*- coding: utf-8 -*- vim: ts=4 sts=4 sw=4 si et tw=79
"""\
HTML letters - erzeugt Tabellen von HTML-Entitys fuer Latin-1-Buchstaben
"""

__author__ = "Tobias Herp <tobias.herp@gmx.net>"
VERSION = (0,
           1,   # initial version
           ## svn-kwd-rev.cmd ...:
           'rev-%s' % '$Rev: 903 $'[6:-2],
           )
__version__ = '.'.join(map(str, VERSION))
try:
    from enhopa import OptionParser, OptionGroup
except ImportError:
    from optparse import OptionParser, OptionGroup
p = OptionParser(version='.'.join(map(str, VERSION)),
                 usage='%prog [-abc]',
                 description='bla...')
g = OptionGroup(p, "Specific options")
g.add_option('--repr', '-r',
             action='store_true',
             help='print representation (repr) instead of plain '
             'string conversions')
g.add_option('-v', '--verbose',
             action='count',
             help='be verbose (-vv: even more verbose)')
p.add_option_group(g)
try:
    p.set_collecting_group()
except AttributeError:
    pass
option, args = p.parse_args()

RFC_MASK = 'http://tools.ietf.org/html/rfc%(rfc)d#page-%(page)d'
_20 = '2.0'
_5 = '5'
HTML2RFC = {'2.0': 1866,
            # nota bene: not an IETF standard anymore (moved to W3C)
            '3.0': 2854,
            }
HTML2W3C = {
            '3.0': 'http://www.w3.org/MarkUp/html3/latin1.html',
            '3.2': 'http://www.w3.org/TR/REC-html32.html#latin1',
            '4.01': 'http://www.w3.org/TR/html401/sgml/entities.html#h-24.2.1',
            '5': 'http://www.w3.org/TR/html5/named-character-references.html'
                                           '#named-character-references'
            }
bases = list('AaCcEeIiNnOoUuYy')
ligatures = ('AE ae sz '
             # HTML 4:
             'OE oe '
             # HTML 5:
             'IJ '
             ).strip().split()
variants = ('acute cedil circ grave ring slash tilde uml'
            ).split()
ENTITY_on_PAGE = {
        _20: {
            ('A', 'acute'):	67,	# &#193; capital A, acute accent
            ('A', 'circ'):	67,	# &#194; capital A, circumflex accent
            ('A', 'grave'):	67,	# &#192; capital A, grave accent
            ('A', 'ring'):	67,	# &#197; capital A, ring
            ('A', 'tilde'):	67,	# &#195; capital A, tilde
            ('A', 'uml'):	67,	# &#196; capital A, dieresis or umlaut mark
            ('C', 'cedil'):	67,	# &#199; capital C, cedilla
            ('E', 'acute'):	67,	# &#201; capital E, acute accent
            ('E', 'circ'):	67,	# &#202; capital E, circumflex accent
            ('E', 'grave'):	68,	# &#200; capital E, grave accent
            ('E', 'uml'):	68,	# &#203; capital E, dieresis or umlaut mark
            ('I', 'acute'):	68,	# &#205; capital I, acute accent
            ('I', 'circ'):	68,	# &#206; capital I, circumflex accent
            ('I', 'grave'):	68,	# &#204; capital I, grave accent
            ('I', 'uml'):	68,	# &#207; capital I, dieresis or umlaut mark
            ('N', 'tilde'):	68,	# &#209; capital N, tilde
            ('O', 'acute'):	68,	# &#211; capital O, acute accent
            ('O', 'circ'):	68,	# &#212; capital O, circumflex accent
            ('O', 'grave'):	68,	# &#210; capital O, grave accent
            ('O', 'slash'):	68,	# &#216; capital O, slash
            ('O', 'tilde'):	68,	# &#213; capital O, tilde
            ('O', 'uml'):	68,	# &#214; capital O, dieresis or umlaut mark
            ('ETH', None):	68,	# &#208; capital Eth, Icelandic
            ('THORN', None):	68,	# &#222; capital THORN, Icelandic
            ('eth', None):	68,	# &#240; small eth, Icelandic
            ('thorn', None):	68,	# &#254; small thorn, Icelandic
            ('U', 'acute'):	68,	# &#218; capital U, acute accent
            ('U', 'circ'):	68,	# &#219; capital U, circumflex accent
            ('U', 'grave'):	68,	# &#217; capital U, grave accent
            ('U', 'uml'):	68,	# &#220; capital U, dieresis or umlaut mark
            ('Y', 'acute'):	68,	# &#221; capital Y, acute accent
            ('a', 'acute'):	68,	# &#225; small a, acute accent
            ('a', 'circ'):	68,	# &#226; small a, circumflex accent
            ('a', 'elig'):	68,	# &#230; small ae diphthong (ligature)
            ('a', 'grave'):	68,	# &#224; small a, grave accent
            ('a', 'ring'):	68,	# &#229; small a, ring
            ('a', 'tilde'):	68,	# &#227; small a, tilde
            ('a', 'uml'):	68,	# &#228; small a, dieresis or umlaut mark
            ('c', 'cedil'):	68,	# &#231; small c, cedilla
            ('e', 'acute'):	68,	# &#233; small e, acute accent
            ('e', 'circ'):	68,	# &#234; small e, circumflex accent
            ('e', 'grave'):	68,	# &#232; small e, grave accent
            ('e', 'uml'):	68,	# &#235; small e, dieresis or umlaut mark
            ('i', 'acute'):	68,	# &#237; small i, acute accent
            ('i', 'circ'):	68,	# &#238; small i, circumflex accent
            ('i', 'grave'):	68,	# &#236; small i, grave accent
            ('i', 'uml'):	68,	# &#239; small i, dieresis or umlaut mark
            ('n', 'tilde'):	68,	# &#241; small n, tilde
            ('o', 'acute'):	68,	# &#243; small o, acute accent
            ('o', 'circ'):	68,	# &#244; small o, circumflex accent
            ('o', 'grave'):	68,	# &#242; small o, grave accent
            ('o', 'slash'):	68,	# &#248; small o, slash
            ('o', 'tilde'):	68,	# &#245; small o, tilde
            ('o', 'uml'):	68,	# &#246; small o, dieresis or umlaut mark
            ('u', 'acute'):	68,	# &#250; small u, acute accent
            ('u', 'circ'):	68,	# &#251; small u, circumflex accent
            ('u', 'grave'):	68,	# &#249; small u, grave accent
            ('u', 'uml'):	69,	# &#252; small u, dieresis or umlaut mark
            ('y', 'acute'):	69,	# &#253; small y, acute accent
            ('y', 'uml'):	69,	# &#255; small y, dieresis or umlaut mark

            ('AE', 'lig'):	67,	# &#198; capital AE diphthong (ligature)
            ('ae', 'lig'):	68,	# &#230; small ae diphthong (ligature)
            ('sz', 'lig'):	68,	# &#223; small sharp s, German (sz ligature)

              },
        }

VARIANT_DESCRIPTIONS = {
            'acute':	'acute accent',
            'cedil':	'cedilla',
            'circ': 	'circumflex accent',
            'grave':	'grave accent',
            'ring': 	'ring',
            'slash':	'slash',
            'tilde':	'tilde',
            'uml':  	'dieresis or umlaut mark',
            # HTML 5:
            'breve':	'???',  # Ubreve
            'caron':	'Hatschek',
            'cy':   	'kyrillisch',
            'dblac':	'Doppelakut',  # Odblac, Udblac
            'dot':  	'mit Punkt',
            'midot':	'mit mittigem Punkt',
            'macr': 	'Makron',
            'ogon': 	'Ogonek',
            'fr':   	'Fraktur',
            'opf':  	'doppelt',
            'scr':  	'Schreibschrift',
            'strok':	'durchgestrichen',
        }

# Entitys mit Suffix 'cy':
CYRILLIC = ('CH DJ DS DZ GJ HARD Iuk Jser Juk KH KJ LJ NJ '
           ' SHCH SH SOFT TSH TS Ubr YI YU YA ZH '
           ' D E G J M S Y Z'
            # ...
            ).split()

CURRENCIES = ('curren dollar euro pound yen cent'
              ).split()

# Varianten des Anfangsbuchstaben (zumindest visuell),
# die keine Entsprechungen für andere Buchstaben haben
NON_SYSTEMATIC = {
        'Bernoullis':
            ('B', '?', _5, '\u212C'),
        'Cayleys':
            ('C', '?', _5, '\u212D'),
        'CapitalDifferentialD':
            ('D', '?', _5, '\u2145'),
        'Chi':
            ('X', 'capital greek letter Chi', _5, '\u03A7'),
        'Cross':
            ('X', 'multiplication symbol', _5, '\u2A2F'),
        'COPY':
            ('C', 'copyright', _5, '\u00a9'),
        'copy':
            ('C', 'copyright', _5, '\u00a9'),
        'DD':
            ('D', '?', _5, '\u2145'),
        'Epsilon':
            ('E', 'capital greek letter Epsilon', _5, '\u0395'),
        'Eta':
            ('H', 'capital greek letter Eta', _5, '\u0397'),
        'Element':
            ('E', 'is element of', _5, '\u2208'),
        'Exists':
            ('E', '?', _5, '\u2203'),
        'NotExists':
            ('E', '?', _5, '\u2204'),
        'ExponentialE':
            ('E', '?', _5, '\u2147'),
        'Fouriertrf':
            ('F', '?', _5, '\u2131'),
        'HilbertSpace':
            ('H', '?', _5, '\u210B'),
        'IEcy':
            ('E', '?', _5, '\u0415'),
        'IOcy':
            ('E', '?', _5, '\u0401'),
        'Im':
            ('I', 'imaginary part', _5, '\u2111'),
        'ImaginaryI':
            ('I', 'i after imaginary part', _5, '\u2148'),
        'Int':
            ('S', 'doubled integral S', _5, '\u222C'),
        'Integral':
            ('S', 'integral S', _5, '\u222B'),
        'Iota':
            ('I', 'capital greek letter Iota', _5, '\u0399'),
        'Iukcy':
            ('I', 'capital cyrillic letter Iuk', _5, '\u0406'),
        'Jcy':
            ('N', 'capital cyrillic letter J', _5, '\u0419'),
        'Jsercy':
            ('J', 'capital cyrillic letter Jser', _5, '\u0408'),
        'Jukcy':
            ('E', 'capital cyrillic letter Juk', _5, '\u0404'),
        'Laplacetrf':
            ('L', 'Laplace transformation', _5, '\u2112'),
        'Mellintrf':
            ('M', '?', _5, '\u2133'),
        'Nu':
            ('N', '?', _5, '\u039D'),
        'Omega':
            ('O', 'capital greek letter Omega', _5, '\u03a9'),
        'Omicron':
            ('O', 'capital greek letter Omicron', _5, '\u039f'),
        'Or':
            ('V', 'logical symbol or', _5, '\u2a54'),
        'PartialD':
            ('D', 'differential d', _5, '\u2202'),
        'Poincareplane':
            ('H', '?', _5, '\u210c'),
        'REG':
            ('R', 'registered', _5, '\u00ae'),
        'R':
            ('P', 'capital cyrillic letter R', _5, '\u0420'),
        'Re':
            ('R', 'real part', _5, '\u211c'),
        'Rho':
            ('P', 'capital greek letter Rho', _5, '\u03a1'),
        'S':
            ('C', 'capital cyrillic letter S', _5, '\u0421'),
        'Tau':
            ('T', 'capital greek letter Tau', _5, '\u03a4'),
        'Ucy':
            ('Y', 'capital cyrillic letter U', _5, '\u0423'),
        'Union':
            ('U', 'union', _5, '\u22c3'),
        # 'UnionPlus':
        #     ('U', 'unionplus', _5, '\u228e'),
        'Upsi':
            ('Y', 'capital greek letter Upsi', _5, '\u03d2'),
        'Upsilon':
            ('Y', 'capital greek letter Ypsilon', _5, '\u03a5'),
        'V':
            ('B', 'capital cyrillic letter V', _5, '\u0412'),
        'Vee':
            ('V', '?', _5, '\u22c1'),
        'YA':
            ('R', 'capital cyrillic letter YA', _5, '\u042f'),
        'YI':
            ('I', 'capital cyrillic letter YI', _5, '\u0407'),
        'YU':
            (('H', 'O'), 'capital cyrillic letter YU', _5, '\u042e'),
        'angst':
            ('A', '&Acirc;ngstr&oslash;m', _5, '\u00c5'),
        'backcong':
            ('S', 'backcong', _5, '\u224c'),
        'bcong':
            ('S', 'backcong', _5, '\u224c'),
        'backepsilon':
            ('E', 'backepsilon', _5, '\u03f6'),
        'bepsi':
            ('E', 'backepsilon', _5, '\u03f6'),
        'backsim':
            ('S', 'backsim', _5, '\u223d'),
        'bsim':
            ('S', 'backsim', _5, '\u223d'),
        'caret':
            ('y', 'caret', _5, '\u2041'),
        'circledR':
            ('R', 'circled R', _5, '\u00ae'),
        'circledS':
            ('S', 'circled S', _5, '\u24c8'),
        'commat':
            ('a', 'at symbol', _5, '\u0040'),
        'complement':
            ('C', 'complement', _5, '\u2201'),
        'comp':
            ('C', 'complement', _5, '\u2201'),
        'complexes':
            ('C', 'set of complex numbers', _5, '\u2102'),
        'copysr':
            ('P', 'copysr', _5, '\u2117'),
        'cross':
            ('X', 'cross', _5, '\u2717'),
        'cup':
            ('U', 'cup (Vereinigungsmenge)', _5, '\u222a'),
        'cups':
            ('U', 'cup (Vereinigungsmenge)', _5,
                    ('\u222a', '\ufe00')
                    ),
        }


"""
S. 67:
<!ENTITY AElig  CDATA "&#198;" -- capital AE diphthong (ligature) -->    
<!ENTITY Aacute CDATA "&#193;" -- capital A, acute accent -->
<!ENTITY Acirc  CDATA "&#194;" -- capital A, circumflex accent -->
<!ENTITY Agrave CDATA "&#192;" -- capital A, grave accent -->
<!ENTITY Aring  CDATA "&#197;" -- capital A, ring -->
<!ENTITY Atilde CDATA "&#195;" -- capital A, tilde -->
<!ENTITY Auml   CDATA "&#196;" -- capital A, dieresis or umlaut mark -->
<!ENTITY Ccedil CDATA "&#199;" -- capital C, cedilla -->
<!ENTITY ETH    CDATA "&#208;" -- capital Eth, Icelandic -->
<!ENTITY Eacute CDATA "&#201;" -- capital E, acute accent -->
<!ENTITY Ecirc  CDATA "&#202;" -- capital E, circumflex accent -->
s. 68:
<!ENTITY Egrave CDATA "&#200;" -- capital E, grave accent -->
<!ENTITY Euml   CDATA "&#203;" -- capital E, dieresis or umlaut mark -->
<!ENTITY Iacute CDATA "&#205;" -- capital I, acute accent -->
<!ENTITY Icirc  CDATA "&#206;" -- capital I, circumflex accent -->
<!ENTITY Igrave CDATA "&#204;" -- capital I, grave accent -->
<!ENTITY Iuml   CDATA "&#207;" -- capital I, dieresis or umlaut mark -->
<!ENTITY Ntilde CDATA "&#209;" -- capital N, tilde -->
<!ENTITY Oacute CDATA "&#211;" -- capital O, acute accent -->
<!ENTITY Ocirc  CDATA "&#212;" -- capital O, circumflex accent -->
<!ENTITY Ograve CDATA "&#210;" -- capital O, grave accent -->
<!ENTITY Oslash CDATA "&#216;" -- capital O, slash -->
<!ENTITY Otilde CDATA "&#213;" -- capital O, tilde -->
<!ENTITY Ouml   CDATA "&#214;" -- capital O, dieresis or umlaut mark -->
<!ENTITY THORN  CDATA "&#222;" -- capital THORN, Icelandic -->
<!ENTITY Uacute CDATA "&#218;" -- capital U, acute accent -->
<!ENTITY Ucirc  CDATA "&#219;" -- capital U, circumflex accent -->
<!ENTITY Ugrave CDATA "&#217;" -- capital U, grave accent -->
<!ENTITY Uuml   CDATA "&#220;" -- capital U, dieresis or umlaut mark -->
<!ENTITY Yacute CDATA "&#221;" -- capital Y, acute accent -->
<!ENTITY aacute CDATA "&#225;" -- small a, acute accent -->
<!ENTITY acirc  CDATA "&#226;" -- small a, circumflex accent -->
<!ENTITY aelig  CDATA "&#230;" -- small ae diphthong (ligature) -->
<!ENTITY agrave CDATA "&#224;" -- small a, grave accent -->
<!ENTITY aring  CDATA "&#229;" -- small a, ring -->
<!ENTITY atilde CDATA "&#227;" -- small a, tilde -->
<!ENTITY auml   CDATA "&#228;" -- small a, dieresis or umlaut mark -->
<!ENTITY ccedil CDATA "&#231;" -- small c, cedilla -->
<!ENTITY eacute CDATA "&#233;" -- small e, acute accent -->
<!ENTITY ecirc  CDATA "&#234;" -- small e, circumflex accent -->
<!ENTITY egrave CDATA "&#232;" -- small e, grave accent -->
<!ENTITY eth    CDATA "&#240;" -- small eth, Icelandic -->
<!ENTITY euml   CDATA "&#235;" -- small e, dieresis or umlaut mark -->
<!ENTITY iacute CDATA "&#237;" -- small i, acute accent -->
<!ENTITY icirc  CDATA "&#238;" -- small i, circumflex accent -->
<!ENTITY igrave CDATA "&#236;" -- small i, grave accent -->
<!ENTITY iuml   CDATA "&#239;" -- small i, dieresis or umlaut mark -->
<!ENTITY ntilde CDATA "&#241;" -- small n, tilde -->
<!ENTITY oacute CDATA "&#243;" -- small o, acute accent -->
<!ENTITY ocirc  CDATA "&#244;" -- small o, circumflex accent -->
<!ENTITY ograve CDATA "&#242;" -- small o, grave accent -->
<!ENTITY oslash CDATA "&#248;" -- small o, slash -->
<!ENTITY otilde CDATA "&#245;" -- small o, tilde -->
<!ENTITY ouml   CDATA "&#246;" -- small o, dieresis or umlaut mark -->
<!ENTITY szlig  CDATA "&#223;" -- small sharp s, German (sz ligature)->
<!ENTITY thorn  CDATA "&#254;" -- small thorn, Icelandic -->
<!ENTITY uacute CDATA "&#250;" -- small u, acute accent -->
<!ENTITY ucirc  CDATA "&#251;" -- small u, circumflex accent -->
<!ENTITY ugrave CDATA "&#249;" -- small u, grave accent -->
S. 69:
<!ENTITY uuml   CDATA "&#252;" -- small u, dieresis or umlaut mark -->
<!ENTITY yacute CDATA "&#253;" -- small y, acute accent -->
<!ENTITY yuml   CDATA "&#255;" -- small y, dieresis or umlaut mark -->
"""


# für Module:
__all__ = [
           ]

if __name__ == '__main__':
    print 'Aufruf als Programm'
    # für Module:
    import modinfo
    modinfo.main(version=__version__)

