How to add arrays of tuples in Python with respect to index?

0

I want to add two arrays, follow_dismiss and display of differente size that comes from SQL queries that have a value and an index on the index that comes from an sql query:

cur.execute("""-- nombre de fois ou l'utilisateur fait ce qu'on attend de lui par utilisateur
SELECT COUNT (swipe.state),swipe.subscriber_id FROM swipe
  WHERE swipe.state= 2 OR swipe.state = 3 or swipe.state=5
  OR swipe.state =6 or swipe.state=8 or swipe.state=9
    GROUP BY swipe.subscriber_id
      ORDER BY swipe.subscriber_id;""")
follow_dismiss = cur.fetchall()
print "follow_dismiss"

and

    cur.execute("""-- nombre de fois ou l'on propose une pub a l'utilisateur par utilisateur
SELECT COUNT (swipe.state),swipe.subscriber_id FROM swipe
  WHERE swipe.state= 1 OR swipe.state = 4 or swipe.state=7
    GROUP BY swipe.subscriber_id
      ORDER BY swipe.subscriber_id;""")

dan:

follow_dismiss
[(409L, 13), (14L, 54), (59L, 68), (6L, 150), (32L, 152), (62L, 156), (59L, 158), (120L, 160), (53L, 161), (150L, 162), (3L, 169), (1L, 171), (60L, 172), (1L, 177), (126L, 179), (41L, 185), (239L, 189), (163L, 190), (11L, 216), (42L, 223), (1L, 272), (2L, 286), (5L, 289), (1L, 292), (2L, 294), (4L, 296)]
display
[(384L, 13), (6L, 54), (300L, 68), (1L, 150), (8L, 152), (33L, 156), (30L, 158), (24L, 160), (5L, 161), (233L, 162), (1L, 171), (65L, 172), (21L, 179), (20L, 185), (86L, 189), (171L, 190), (2L, 216), (29L, 223), (2L, 276), (3L, 286), (1L, 296)]

I want to get:

[(793,13), (20,54), ...]

To what this is what I have tried:

follow_dismiss=[(409L, 13), (14L, 54), (59L, 68), (6L, 150), (32L, 152), (62L, 156), (59L, 158), (120L, 160), (53L, 161), (150L, 162), (3L, 169), (1L, 171), (60L, 172), (1L, 177), (126L, 179), (41L, 185), (239L, 189), (163L, 190), (11L, 216), (42L, 223), (1L, 272), (2L, 286), (5L, 289), (1L, 292), (2L, 294), (4L, 296)]
display=[(384L, 13), (6L, 54), (300L, 68), (1L, 150), (8L, 152), (33L, 156), (30L, 158), (24L, 160), (5L, 161), (233L, 162), (1L, 171), (65L, 172), (21L, 179), (20L, 185), (86L, 189), (171L, 190), (2L, 216), (29L, 223), (2L, 276), (3L, 286), (1L, 296)]

def column_sums(a, b):
    result = []
    for i in range(len(a)):
        for j in range(len(b)):  
        if a[i][2]=b[j][2]
            result[i][1] =a[i][1]+b[j][1] 
        else if a[i][2]<b[j][2]
            result[i][1]=a[i][1]
            result[i][2]= a[j][2]
            j = j-1 # para quedar
        else if a[i][2]>b[j][2]
            result[i][1]= b[j][1]
            result[i][2]= b[j][2]
            i = i-1 # para quedar

a = colums_sums(follow_dismiss,display)
print a 

But he tells me:

./testPython.py: line 1: syntax error near unexpected token '('
./testPython.py: line 1: 'follow_dismiss=[(409L, 13), (14L, 54), (59L, 68), (6L, 150), (32L, 152), (62L, 156), (59L, 158), (120L, 160), (53L, 161), (150L, 162), (3L, 169), (1L, 171), (60L, 172), (1L, 177), (126L, 179), (41L, 185), (239L, 189), (163L, 190), (11L, 216), (42L, 223), (1L, 272), (2L, 286), (5L, 289), (1L, 292), (2L, 294), (4L, 296)]'

Update 23/05/2017

He did with the help of FJSevilla's response:

    cur.execute("""-- nombre de fois ou l'utilisateur fait ce qu'on attend de lui par utilisateur
    SELECT COUNT (swipe.state),swipe.subscriber_id FROM swipe
      WHERE swipe.state= 2 OR swipe.state = 3 or swipe.state=5
      OR swipe.state =6 or swipe.state=8 or swipe.state=9
        GROUP BY swipe.subscriber_id
          ORDER BY swipe.subscriber_id;""")
    follow_dismiss_sql = cur.fetchall()

    cur.execute("""-- nombre de fois ou l'on propose une pub a l'utilisateur par utilisateur
SELECT COUNT (swipe.state),swipe.subscriber_id FROM swipe
  WHERE swipe.state= 1 OR swipe.state = 4 or swipe.state=7
    GROUP BY swipe.subscriber_id
      ORDER BY swipe.subscriber_id;""")
    display_sql = cur.fetchall()
    follow_dismiss = np.array(follow_dismiss_sql)
    display = np.array(display_sql)

    print follow_dismiss_sql
    print display_sql

    df = pd.DataFrame(follow_dismiss+display,  columns = ('counts',  'id') )
    suma = df.groupby("id").sum()
    out = pd.np.column_stack((suma.counts.values, suma.index.values))
    print(out)


df = pd.DataFrame(follow_dismiss+display,  columns = ('counts',  'id') )
suma = df.groupby("id").sum()
out = pd.np.column_stack((suma.counts.values, suma.index.values))
print(out)

But he tells me that:

df = pd.DataFrame(follow_dismiss+display,  columns = ('counts',  'id') )
ValueError: operands could not be broadcast together with shapes (26,2) (21,2) 

But it works when he made the answer of FJSevilla ...

    
asked by ThePassenger 22.05.2017 в 16:39
source

1 answer

2

Using Pandas with DataFrame.gropby the grouping is very simple :

import pandas as pd

follow_dismiss= [(409, 13), (14, 54), (59, 68), (6, 150), (32, 152), (62, 156), (59, 158), (120, 160), (53, 161), (150, 162), (3, 169), (1, 171), (60, 172), (1, 177), (126, 179), (41, 185), (239, 189), (163, 190), (11, 216), (42, 223), (1, 272), (2, 286), (5, 289), (1, 292), (2, 294), (4, 296)]
display= [(384, 13), (6, 54), (300, 68), (1, 150), (8, 152), (33, 156), (30, 158), (24, 160), (5, 161), (233, 162), (1, 171), (65, 172), (21, 179), (20, 185), (86, 189), (171, 190), (2, 216), (29, 223), (2, 276), (3, 286), (1, 296)]

#Creamos un dataframe con las dos columnas
df = pd.DataFrame(follow_dismiss+display,  columns = ('counts',  'id') )
#Agrupamos por id sumando el resto de columnas
suma = df.groupby("id").sum()

#Podemos seguir trabajando sobre el DataFrame pero si queremos la salida como array NumPy basta con:
out = pd.np.column_stack((suma.counts.values, suma.index.values))
print(out)

Edit:

If the input data are arrays of NumPy we can not do follow_dismiss + display because this what it does is try to add element by element both arrays, not concatenate them. For this we use numpy.concatenate() :

import pandas as pd
import numpy as np

follow_dismiss= np.array([(409, 13), (14, 54), (59, 68), (6, 150), (32, 152), (62, 156), (59, 158), (120, 160), (53, 161), (150, 162), (3, 169), (1, 171), (60, 172), (1, 177), (126, 179), (41, 185), (239, 189), (163, 190), (11, 216), (42, 223), (1, 272), (2, 286), (5, 289), (1, 292), (2, 294), (4, 296)])
display= np.array([(384, 13), (6, 54), (300, 68), (1, 150), (8, 152), (33, 156), (30, 158), (24, 160), (5, 161), (233, 162), (1, 171), (65, 172), (21, 179), (20, 185), (86, 189), (171, 190), (2, 216), (29, 223), (2, 276), (3, 286), (1, 296)])

df = pd.DataFrame(np.concatenate((follow_dismiss, display),axis=0),  columns = ('counts',  'id') )
suma = df.groupby("id").sum()

out = pd.np.column_stack((suma.counts.values, suma.index.values))
print(out)

Exit:

array([[793,  13],
       [ 20,  54],
       [359,  68],
       [  7, 150],
       [ 40, 152],
       [ 95, 156],
       [ 89, 158],
       [144, 160],
       [ 58, 161],
       [383, 162],
       [  3, 169],
       [  2, 171],
       [125, 172],
       [  1, 177],
       [147, 179],
       [ 61, 185],
       [325, 189],
       [334, 190],
       [ 13, 216],
       [ 71, 223],
       [  1, 272],
       [  2, 276],
       [  5, 286],
       [  5, 289],
       [  1, 292],
       [  2, 294],
       [  5, 296]], dtype=int64)
    
answered by 22.05.2017 / 21:24
source