from pytelegrafhttp.util import check_online # Global default install directory for all files. Note that each specific file / directory may override where it is # located, but by default they are all grouped together install_dir = '/etc/pytelegrafhttp' ############### # ENVIRONMENT # ############### # Directory for files for use with daemon communication env_daemon_files_dir = install_dir + '/daemon' env_cookies_file = install_dir + '/cookies.pkl' env_state_file = install_dir + '/state.pkl' ######## # TIME # ######## # How often metrics are collected. Measured in seconds. time_collection_interval = 120 time_save_frequency = 5 ########### # LOGGING # ########### # Location of log file directory (by default all logs are kept together; override individual log file paths to change # this). log_dir = install_dir + '/logs' # Location of log file that contains all output log_main_log_path = log_dir + '/main.log' # Location of log file that contains only errors log_error_log_path = log_dir + '/errors.log' # Maximum size of a log file before it is rotated. Format is flexible, and accepts strings such as "24KB", "8g", or # "5kbs" log_file_max_size = "5 Mbs" # Number of log files to keep. Once this number of rotated logs is reached, every rotation after that will cause the # oldest one to be deleted. log_file_keep_count = 4 # Whether entire HTTP request should be in logged output. If True, headers, body, and query string are included. If # False, only the host and the URI endpoint are included in logged output. WARNING: If set to True, sensitive data # could be included in logged output, such as passwords or session IDs. log_full_http_requests = False # Whether entire HTTP response should be in response. If True, headers and body are included. If False, only the HTTP # status code is included in logged output. log_full_http_responses = False # Additional system log to use. Adding one of these values requires that the associated python module is installed on # the host system separately from this application. # # Supported values are: 'systemd' log_os_logs = [] # Uncomment to enable journalctl logging log_os_logs.append('systemd') ########### # SCRAPER # ########### scraper_host = 'e-hentai.org' scraper_username = 'yourusername' scraper_password = 'XXXXXXXXXX' scraper_use_ssl = True scraper_login_steps = [ ('attempt', {'endpoint': '/hentaiathome.php'}), ('resp-extract', {'type': 'form-vars', 'inject': {'UserName': 'username', 'PassWord': 'password'}}), ('submit-form', {}), ('bounce-transfer', {'pattern': 'Or click here if you do not wish to wait'}), ('verify', {'pattern': 'H@H Miss% shows the percentage of requests'}) ] scraper_bot_kicked_pattern = 'banned for excessive pageloads which indicates' scraper_logged_out_pattern = 'requires you to log on.

' scraper_telegraf_destinations = { 'hath-client-net-stats': { 'port': 10050, 'global-tags': {} }, 'hath-net': { 'port': 10051, 'global-tags': {} } } scraper_endpoints = [] scraper_endpoints.append({ 'endpoint': '/hentaiathome.php', 'verify-pattern': 'H@H Miss% shows the percentage of requests', 'metrics': [ { 'dest': 'hath-net', # destination db / telegraf identifier 'name': 'hath_net', # metrics name 'regex': [ r'North and South America\s*', r']*>[^ ]+ Gbit/s\s*', r']*>=\s*', r']*>([^ ]+) MB/s\s*', r']*>([^ ]+) %\s*', r']*>([^<]+)\s*', r']*>([^<]+)\s*', r']*>([^<]+)', ], 'values': [ {'name': 'load', 'conversion': int, 'type': 'CAPTURE-1'}, {'name': 'miss-rate', 'conversion': float, 'type': 'CAPTURE-2'}, {'name': 'coverage', 'conversion': float, 'type': 'CAPTURE-3'}, {'name': 'hits-per-gb', 'conversion': float, 'type': 'CAPTURE-4'}, {'name': 'quality', 'conversion': int, 'type': 'CAPTURE-5'} ], 'tags': {'region': 'americas'} }, { 'dest': 'hath-net', 'name': 'hath_net', 'regex': [ r'Europe and Africa\s*', r']*>[^ ]+ Gbit/s\s*', r']*>=\s*', r']*>([^ ]+) MB/s\s*', r']*>([^ ]+) %\s*', r']*>([^<]+)\s*', r']*>([^<]+)\s*', r']*>([^<]+)', ], 'values': [ {'name': 'load', 'conversion': int, 'type': 'CAPTURE-1'}, {'name': 'miss-rate', 'conversion': float, 'type': 'CAPTURE-2'}, {'name': 'coverage', 'conversion': float, 'type': 'CAPTURE-3'}, {'name': 'hits-per-gb', 'conversion': float, 'type': 'CAPTURE-4'}, {'name': 'quality', 'conversion': int, 'type': 'CAPTURE-5'} ], 'tags': {'region': 'europe-africa'} }, { 'dest': 'hath-net', 'name': 'hath_net', 'regex': [ r'Asia and Oceania\s*', r']*>[^ ]+ Gbit/s\s*', r']*>=\s*', r']*>([^ ]+) MB/s\s*', r']*>([^ ]+) %\s*', r']*>([^<]+)\s*', r']*>([^<]+)\s*', r']*>([^<]+)', ], 'values': [ {'name': 'load', 'conversion': int, 'type': 'CAPTURE-1'}, {'name': 'miss-rate', 'conversion': float, 'type': 'CAPTURE-2'}, {'name': 'coverage', 'conversion': float, 'type': 'CAPTURE-3'}, {'name': 'hits-per-gb', 'conversion': float, 'type': 'CAPTURE-4'}, {'name': 'quality', 'conversion': int, 'type': 'CAPTURE-5'} ], 'tags': {'region': 'asia-oceania'} }, { 'dest': 'hath-net', 'name': 'hath_net', 'regex': [ r'Global\s*', r']*>[^ ]+ Gbit/s\s*', r']*>=\s*', r']*>([^ ]+) MB/s\s*', r']*>([^ ]+) %\s*', r']*>([^<]+)\s*', r']*>([^<]+)\s*', r']*>([^<]+)', ], 'values': [ {'name': 'load', 'conversion': int, 'type': 'CAPTURE-1'}, {'name': 'miss-rate', 'conversion': float, 'type': 'CAPTURE-2'}, {'name': 'coverage', 'conversion': float, 'type': 'CAPTURE-3'}, {'name': 'hits-per-gb', 'conversion': float, 'type': 'CAPTURE-4'}, {'name': 'quality', 'conversion': int, 'type': 'CAPTURE-5'} ], 'tags': {'region': 'global'} }, { 'dest': 'hath-client-net-stats', 'name': 'hath_health', 'regex': [ r'\s*', r']*>([^<]+)\s*', r'([^<]+)\s*', r']*>Online\s*', r'[^<]*\s*', r'([^<]*)\s*', r'([^<]+)\s*', r']*>[^<]+\s*', r'[^<]*\s*', r'[^<]*\s*', r'[^<]*\s*', r']*>([^<]+)\s*', r'([^<]+)\s*', r'([^ ]+) / min\s*', r'([^ ]+) / day\s*', ], 'values': [ {'name': 'online', 'conversion': lambda last: check_online(last, max_minutes=5), 'type': 'CAPTURE-3'}, {'name': 'files', 'conversion': lambda s: int(s.replace(',', '')), 'type': 'CAPTURE-4'}, {'name': 'trust', 'conversion': int, 'type': 'CAPTURE-5'}, {'name': 'quality', 'conversion': int, 'type': 'CAPTURE-6'}, {'name': 'hitrate', 'conversion': float, 'type': 'CAPTURE-7'}, {'name': 'hathrate', 'conversion': float, 'type': 'CAPTURE-8'} ], 'tags': { 'host': 'CAPTURE-1', 'client-id': 'CAPTURE-2', } }, { 'dest': 'hath-client-net-stats', 'name': 'hath_health', 'regex': [ r'\s*', r']*>([^<]+)\s*', r'([^<]+)\s*', r']*>Offline\s*', r'[^<]*\s*', r'[^<]*\s*', r'([^<]+)\s*', r']*>Not available when offline\s*' ], 'values': [ {'name': 'online', 'conversion': 0, 'type': 'VALUE'}, {'name': 'files', 'conversion': lambda x: int(x.replace(',', '')), 'type': 'CAPTURE-3'} ], 'tags': { 'host': 'CAPTURE-1', 'client-id': 'CAPTURE-2', } } ] })