#!/usr/bin/env python

# Copyright (c) 2012-2013, Geir Skjotskift, geir@underworld.no
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the  nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL  BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from scapy.all import sniff
from sys import stdout
from datetime import datetime
from optparse import OptionParser

DEFAULT_PORTS = [80, 8080, 3128]

FILTER_TEMPLATE = 'tcp and (port {0})'

LOG_HEADER   = '#Client [timestamp] "Referer" Host "Method URI" Type "UserAgent"\n'
LOG_LINE = '{client} [{timestamp}] "{referer}" {host} "{method} {uri}" {type} "{useragent}"\n'

METHODS = ['GET', 'POST', 'CONNECT', 'PUT']

def get_filter(ports=DEFAULT_PORTS):
    """Creates a string used as filter in sniffer mode

    Arguments:

    ports - Integer list of ports to listen on"""

    return FILTER_TEMPLATE.format(' or port '.join(map(str, ports)))


def generate_logger_function(output=stdout, header=True, forceflush=True):
    """Logger function to pass into sniffer or use with the 
    pcap reader

    Arguments:
    
    output - File or IO to write log to (must provide the .write() method
    header - Boolean. Wether or not a header should be provided on create.
             Default is True"""

    def logger(packet):
        parsed_header = parse_http_header(packet)
        if parsed_header:
            output.write(LOG_LINE.format(**parsed_header))
            if forceflush:
                output.flush()

    if not hasattr(output, 'write'):
        raise InvalidOutput()

    if header:
        output.write(LOG_HEADER)
    return logger


def contains_header(packet):
    """Check if the provided packet contains a HTTP header

    Arguments:

    packet - a scapy packet"""

    if packet.haslayer("TCP"):
        payload = str(packet["TCP"].payload)
        for method in METHODS:
            if payload.find(method, 0, len(method)) >= 0:
                return True
    return False


def parse_http_header(packet):
    """Parse out log fields from a packet return a 
    dictionary for use with the logger.

    Arguments:

    packet - a scapy packet"""

    if not contains_header(packet):
        return False
    lines = str(packet["TCP"].payload).split('\n')
    packet_data = { 'method': 'UNKNOWN',
                    'host': packet["IP"].dst,
		    'referer': '-',
                    'useragent': '-'}

    method_line = lines[0]
    for method in METHODS:
        if method_line.find(method, 0, len(method)) >= 0:
            packet_data['method'] = method
            rest = method_line[len(method):].strip().split()
            try:
                packet_data['uri'] = rest[0]
            except IndexError:
                packet_data['uri'] = 'UNKNOWN'
            try:
                packet_data['type'] = rest[1]
            except IndexError:
                packet_data['type'] = 'UNKNOWN'
            break

    for line in lines[1:]:
        if line[:4].lower() == "host":
            packet_data['host'] = line[6:].strip()
        if line[:7].lower() == "referer":
            packet_data['referer'] = line[9:].strip()
        if line[:10].lower() == "user-agent":
            packet_data['useragent'] = line[12:].strip()

    packet_data['size'] = '-'
    packet_data['client'] = packet["IP"].src
    packet_data['timestamp'] = datetime.fromtimestamp(packet.time).isoformat()

    return packet_data


def main():
    
    parser = OptionParser()
    parser.add_option("-o", "--output", dest="output",
                  help="write output to FILE", metavar="FILE",
                  default=None)
    parser.add_option("-r", "--read", dest="input",
                  default=None, help="Read from pcap FILE")
    parser.add_option("-i", "--interface", dest="interface",
                  default=None, help="Listen interface")
    parser.add_option("-f", "--filter", dest="filter", default=None,
                  metavar="LIST",
                  help="LIST of ports to listen on. Default: 80,3128,8080")
    parser.add_option("-F", "--forceflush", dest="forceflush",
                default=True, action="store_false",
                  help="Force output flush after each log entry.")

    (options, args) = parser.parse_args()

    if options.output:
        logger = generate_logger_function(open(options.output, 'wb'),
                forceflush=options.forceflush)
    else:
        logger = generate_logger_function(forceflush=options.forceflush)

    if options.filter:
        packet_filter = get_filter(map(int, options.filter.split(',')))  
    else:
        packet_filter = get_filter()


    sniff(iface=options.interface, filter=packet_filter, store=0, offline=options.input, prn=logger)

class InvalidOutput(StandardError):

    def __init__(self):

        super("Invalid output object, must provide .write() method")

if __name__ == '__main__':
    main()
