first commit
This commit is contained in:
commit
cd1e91bb99
287 changed files with 86425 additions and 0 deletions
76
rasta_exp/cluster_worker/stats.py
Normal file
76
rasta_exp/cluster_worker/stats.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from tabulate import tabulate
|
||||
import sys
|
||||
|
||||
|
||||
def extract(data, key_list):
|
||||
d = {}
|
||||
for e in key_list:
|
||||
d[e] = data[e]
|
||||
return d
|
||||
|
||||
|
||||
tabulate_tools = [["Tool", "Total", "Timeout", "Crash", "Time (s)", "Memory (MB)"]]
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("python3 stats.py directory")
|
||||
quit()
|
||||
|
||||
print("Going into " + sys.argv[1])
|
||||
os.chdir(sys.argv[1])
|
||||
|
||||
for dir in os.listdir():
|
||||
if os.path.isdir(dir):
|
||||
print("Processing " + str(dir))
|
||||
df = pd.DataFrame()
|
||||
# df.astype({"apk_size": int, "crashed": bool})
|
||||
|
||||
for file in os.listdir(dir):
|
||||
with open(dir + "/" + file, "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
d = {}
|
||||
d = extract(
|
||||
data, ["crashed", "timeout", "user-cpu-time", "max-rss-mem"]
|
||||
)
|
||||
d.update(
|
||||
extract(
|
||||
data["apk"], ["apk_size", "min_sdk", "target_sdk", "max_sdk"]
|
||||
)
|
||||
)
|
||||
df_apk = pd.DataFrame(d, index=[data["apk"]["sha256"]])
|
||||
if not df.empty:
|
||||
df = pd.concat([df, df_apk])
|
||||
else:
|
||||
df = df_apk
|
||||
|
||||
# print(df)
|
||||
# print("Total: " + str(len(df)))
|
||||
# print("Crash: " + str(df["crashed"].sum()))
|
||||
# print("Average size: " + str(df["apk_size"].mean() / 1000 ** 2) + " Mo ")
|
||||
# print("Average size crashed: " + str(df[df["crashed"] == True]["apk_size"].mean() / 1000 ** 2) + " Mo ")
|
||||
# print("Average size not crashed: " + str(df[df["crashed"] == False]["apk_size"].mean() / 1000 ** 2) + " Mo ")
|
||||
|
||||
# df.target_sdk.fillna(value='0', inplace=True) # Replace None values by 0
|
||||
# #df.replace(to_replace=[None], value=np.nan, inplace=True)
|
||||
# df['target_sdk']=df['target_sdk'].astype(int)
|
||||
# df.sort_values("target_sdk") # Sort on a column
|
||||
#
|
||||
# ax = plt.gca()
|
||||
# df.plot(kind='scatter',x='target_sdk',y='apk_size', ax=ax)
|
||||
# plt.show()
|
||||
|
||||
df["user-cpu-time"] = df["user-cpu-time"].astype(float) # HACK
|
||||
df["max-rss-mem"] = df["max-rss-mem"].astype(float) # HACK
|
||||
cpu = round(df["user-cpu-time"].mean(), 1)
|
||||
memory = int(df["max-rss-mem"].mean() / (1000**2))
|
||||
tabulate_tools.append(
|
||||
[dir, len(df), df["timeout"].sum(), df["crashed"].sum(), cpu, memory]
|
||||
)
|
||||
|
||||
print(tabulate(tabulate_tools))
|
||||
print(tabulate(tabulate_tools, tablefmt="latex"))
|
Loading…
Add table
Add a link
Reference in a new issue