Python中如何进行二维数组的数据处理?

分析 nginx 的日志,目前把把数据按照需求,处理成[(requesturl1,responsetime1),(requesturl2,responsetime2),.......].
现在想要统计下 url 的次数,以及 avg responsetime,并排序.
我能想到的就是 for 循环,感觉有点 low,请教大家一下更好的办法,多谢!

a =[[“d”,2],[“c”,5],[“a”,9],[“b”,4],[“b”,2],[“c”,9]] uniqs = list(set([x[0] for x in a])) res = {} for i in uniqs: count = 0 sumtime = 0 for j in a: if i == j[0]: count = count + 1 sumtime = sumtime + j[1] res[i] = [count,sumtime] lists = [] for i in res.keys(): lists.append((i,res[i][0],res[i][1]/res[i][0])) print(sorted(lists,key=lambda x:x[1],reverse=True))


Python中如何进行二维数组的数据处理?

9 回复

稍微减少了几行
from itertools import groupby
from operator import itemgetter

a = [[“d”, 2], [“c”, 5], [“a”, 9], [“b”, 4], [“b”, 2], [“c”, 9]]

a = sorted(a, key=itemgetter(0))
lists = []

for key, group in groupby(a, itemgetter(0)):
    time_list = [item[1] for item in group]
    lists.append((key, len(time_list), sum(time_list) / len(time_list)))

print(sorted(lists, key=itemgetter(1), reverse=True))


# 二维数组处理的核心方法
import numpy as np

# 1. 创建二维数组
arr_2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# 2. 基本操作
print("原始数组:")
print(arr_2d)
print(f"形状: {arr_2d.shape}")
print(f"维度: {arr_2d.ndim}")

# 3. 数据访问
print("\n数据访问:")
print("第一行:", arr_2d[0])          # 第一行
print("第一列:", arr_2d[:, 0])       # 第一列
print("元素(1,2):", arr_2d[1, 2])   # 第二行第三列

# 4. 常用数据处理
print("\n常用处理:")
print("转置:\n", arr_2d.T)
print("按行求和:", arr_2d.sum(axis=1))
print("按列求和:", arr_2d.sum(axis=0))
print("平均值:", arr_2d.mean())
print("最大值:", arr_2d.max())

# 5. 条件筛选
print("\n条件筛选:")
mask = arr_2d > 5
print("大于5的元素:", arr_2d[mask])

# 6. 矩阵运算
print("\n矩阵运算:")
arr2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
print("矩阵相加:\n", arr_2d + arr2)
print("矩阵相乘:\n", np.dot(arr_2d, arr2))

# 7. 重塑和切片
print("\n重塑和切片:")
print("展平:", arr_2d.flatten())
print("重塑为1x9:", arr_2d.reshape(1, 9))
print("切片[0:2, 1:3]:\n", arr_2d[0:2, 1:3])

二维数组处理用numpy最方便,基本操作就这些。

py<br>import collections<br>a = [["d", 2], ["c", 5], ["a", 9], ["b", 4], ["b", 2], ["c", 9]]<br>res = collections.defaultdict(list)<br>x, y = zip(*a)<br>for i in range(len(x)):<br> res[x[i]] += y[i],<br><br>print(sorted([(key, len(value), sum(value)/len(value)) for key, value in res.items()] , key = lambda x: (x[1]), reverse = True))<br>
试了一下 collections

import pandas as pd
a =[[“d”,2],[“c”,5],[“a”,9],[“b”,4],[“b”,2],[“c”,9]]
d = pd.DataFrame(a)
[ (_[0], _[1].mean().get_values()) for _ in d.groupby(0)]

Out:

[(‘a’, array([9.])),
(‘b’, array([3.])),
(‘c’, array([7.])),
(‘d’, array([2.]))]

似乎还能更短, 要用 Python3
from itertools import groupby
from operator import itemgetter

a = [[“d”, 2], [“c”, 5], [“a”, 9], [“b”, 4], [“b”, 2], [“c”, 9]]

print(sorted([(key, len(group), sum(item[1] for item in group) / len(group))
    for key, (*group,) in groupby(sorted(a, key=itemgetter(0)), itemgetter(0))], key=itemgetter(1), reverse=True))

请教,pd 这个没有次数统计.如何统计呢,我也在看 pandas

groupby 不就是次数统计了么……你想实现什么的计数? pandas 有 unique 和 nunique 方法。

<br>import pandas as pd<br>a =[["d",2],["c",5],["a",9],["b",4],["b",2],["c",9]]<br>df = pd.DataFrame(a)<br>df.groupby(0).agg(["count","mean"])<br>

Out[5]:
1
count mean
0
a 1 9
b 2 3
c 2 7
d 1 2

谢谢,还是 pd 最简洁

回到顶部