Group and add equal values from a python list

1

I have these 2 lists:

list_ip = ["192.168.2.9", "192.168.2.8", "192.168.2.7", "192.168.2.6"]
list_traffic = [
    [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.9', u'10', u'1', u'*', u'*'],
    [u'192.168.2.7', u'74.125.139.124', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    [u'192.168.2.9', u'74.125.139.125', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
]

The objective is to add the value of position 2 (bytes) of each of the lists within list_traffic. Only the Ips that are in list_ip (private ip) must be added.

As a reference, I share the value of each position in the list:

 #list_example: [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'] 
 #position 0=src_ip  #Bytes downloaded
 #position 1=dst_ip  #Bytes Uploaded
 #position 2=bytes   #Valor a sumarse
 #position 3=packets

Example:

The first value of list_ip is "192.168.2.9", and you must add the bytes (position 2) that have the ip "192.168.2.9", in this case they would be list_traffic.

    [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.9', u'10', u'1', u'*', u'*'],
    [u'192.168.2.9', u'74.125.139.125', u'10', u'1', u'*', u'*'],

src_ip "192.168.2.9" 10 + 10 = 20 (bytes downloaded)

dst_ip "192.168.2.9 10 = 10 (bytes uploaded)

The expected result would be this:

# Result
 new_list_traffic = { 
     "192.168.2.9": [20, 10],
     "192.168.2.7": [10, 30],
 }
#192.168.2.9 = 20/10  download/upload bytes
#192.168.2.7 = 10/30 download/upload bytes

How could you add the values of the position bytes efficiently?

I have already implemented it, but it takes me 2 seconds to go through the entire list, I share my code:

def trafico_clientes2(request):
    start_time = time.clock()
    #list_example: [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'] 
    #0=src_ip, 1=dst_ip, 2=bytes, 3=packets

    list_ip = ["192.168.2.9", "192.168.2.8", "192.168.2.7", "192.168.2.6"]
    list_traffic = [
        [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'],
        [u'23.67.208.186', u'192.168.2.9', u'10', u'1', u'*', u'*'],
        [u'192.168.2.7', u'74.125.139.124', u'10', u'1', u'*', u'*'],
        [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
        [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
        [u'192.168.2.9', u'74.125.139.125', u'10', u'1', u'*', u'*'],
        [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    ]
    new_list_traffic = { }

    for traffic_ip in list_traffic:
        src_ip = traffic_ip[0]
        dst_ip = traffic_ip[1]
        bytes = int(traffic_ip[2])
        if src_ip in list_ip:
            #bytes download
            total_bytes = new_list_traffic.get(src_ip)
            if total_bytes == None:
                new_list_traffic[src_ip] = [bytes, 0]
            else:
                total_bytes [0] = total_bytes [0] + bytes
                new_list_traffic[src_ip] = total_bytes

        elif dst_ip in list_ip:
            #bytes upload
            total_bytes = new_list_traffic.get(dst_ip)
            if total_bytes == None:
                new_list_traffic[dst_ip] = [0, bytes]
            else:
                total_bytes [1] = total_bytes [1] + bytes
                new_list_traffic[dst_ip] = total_bytes
    # Result
    # new_list_traffic = { 
    #     "192.168.2.9": [20, 10],
    #     "192.168.2.7": [10, 30],
    # }
    #192.168.2.9 = 20/10  download/upload bytes
    #192.168.2.7 = 10/30 download/upload bytes

    total_tiempo =  time.clock() - start_time, "seconds"
    return render(request, 'trafico.html',{"datos": list_traffic,  "lista_trafico": new_list_traffic, "total_tiempo": total_tiempo})

I hope you can support me by optimizing the time.

Greetings.

    
asked by oscar 16.05.2017 в 15:52
source

2 answers

4

I can think of a simple way using collections.defaultdict that saves having to go through the list several times:

from collections import defaultdict

list_ip = ["192.168.2.9", "192.168.2.8", "192.168.2.7", "192.168.2.6"]
list_traffic = [
    [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.9', u'10', u'1', u'*', u'*'],
    [u'192.168.2.7', u'74.125.139.124', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    [u'192.168.2.9', u'74.125.139.125', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
]

# construimos el diccionario donde guardar el resultado, con valor por
# defecto [0,0]
new_list_traffic = defaultdict(lambda: [0,0])

# iteramos por la lista de tráfico
for x in list_traffic:
    (src_ip, dst_ip, bytes) = x[:3]
    if src_ip in list_ip:
        new_list_traffic[src_ip][0] += int(bytes)
    if dst_ip in list_ip:
        new_list_traffic[dst_ip][1] += int(bytes)

print new_list_traffic.items()

It could be even more elegant if we create a class to represent the traffic:

class Bytes:
    def __init__(self):
        self.upload = 0
        self.download = 0
    def __repr__(self):
        return "Bytes(upload={}, download={})".format(self.upload, self.download)

new_list_traffic = defaultdict(Bytes)

for x in list_traffic:
    (src_ip, dst_ip, bytes) = x[:3]
    if src_ip in list_ip:
        new_list_traffic[src_ip].upload += int(bytes)
    if dst_ip in list_ip:
        new_list_traffic[dst_ip].download += int(bytes)

print dict(new_list_traffic)
    
answered by 17.05.2017 в 03:18
1

Surely there are better options, but I can think of the following:

from itertools import groupby
from itertools import tee

list_ip = ["192.168.2.9", "192.168.2.8", "192.168.2.7", "192.168.2.6"]
list_traffic = [
    [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.9', u'10', u'1', u'*', u'*'],
    [u'192.168.2.7', u'74.125.139.124', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    [u'192.168.2.9', u'74.125.139.125', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
]

lista = []

# Lista de download
for i, g in groupby(sorted([[l[0], l[2]] for l in list_traffic if l[0] in list_ip]), key=lambda x: x[0]):
    lista.append([i, "d", sum(int(v[1]) for v in g)])

# Lista de upload
for i, g in groupby(sorted([[l[1], l[2]] for l in list_traffic if l[1] in list_ip]), key=lambda x: x[0]):
    lista.append([i, "u", sum(int(v[1]) for v in g)])


# Consolidación de ambas listas y sumatoria
lista_final = []
for i, g in groupby(sorted([l for l in lista]), key=lambda x: x[0]):

  grp1, grp2 = tee(g)
  lista_final.append([i, 
                      sum(int(v[2]) for v in grp1 if v[1] == 'd'), 
                      sum(int(v[2]) for v in grp2 if v[1] == 'u')
                      ])

print(lista_final)
    
answered by 16.05.2017 в 17:08