#!/usr/bin/env python
"""
    jldwebscaper - scrape a web page for anchor links

    @author: Jean-Lou Dupont
"""
__version__="0.2"
import os, sys, argparse
op=os.path

try:
    import jlddk #@UnusedImport
except:
    ### must be in dev mode then    
    this_dir=op.dirname(__file__)
    lib_path=op.abspath(op.join(this_dir, ".."))
    sys.path.insert(0, lib_path)
    import jlddk #@UnusedImport

########################################################################

DESC="""Scrapes a web page - v%s""" % __version__
DEFAULTS={
          "polling_interval": 30
          ,"batch_size":16
          }

def main():
    try:
        import jlddk.do_setup  #@UnusedImport
        import logging
        import jlddk.do_checks #@UnusedImport
        
        parser=argparse.ArgumentParser(description=DESC, fromfile_prefix_chars='@', formatter_class=argparse.RawTextHelpFormatter)
        parser.add_argument('-su', dest='source_url',   type=str, help="Source URL", required=True)
        parser.add_argument('-cp', dest='check_path',   type=str, help="Source URL")
        parser.add_argument('-bs', dest='batch_size',   type=int, help="Split links in batch", default=DEFAULTS["batch_size"])
        parser.add_argument('-j',  dest='format_json',  action="store_true", help="Output in JSON", default=False)
        parser.add_argument('-e',  dest='propagate_error',  action="store_true", help="Propagate errors (JSON)", default=False)
        parser.add_argument('-p',  dest="polling_interval", type=int, action="store", help="Polling interval (seconds)", default=DEFAULTS["polling_interval"])        

        from jlddk.script_webscraper import run
        from jlddk.tools_sys import process_command_line
        args=process_command_line(parser)
        run(**args)        

    except KeyboardInterrupt:
        logging.info("..Exiting")
        sys.exit(0)##no probs
        
    except Exception,e:
        logging.error(str(e))
        sys.exit(1)
        
sys.exit(main())
