1# This is a sample Python script.
2import datetime
3import os
4
5import pandas as pd
6from io import StringIO
7
8import plotly.express as px
9
10import plotly.graph_objects as go
11
12# pd.DataFrame({'A': [1, 2, 3]})
13# # print(f'test: {pd.DataFrame}')
14#
15# index = pd.date_range("1/1/2000", periods=8)
16#
17# print(index)
18
19
20class FileSize:
21 def __init__(self, df, filename, date):
22 self.df = df
23 self.filename = filename
24 self.date = date
25
26
27def parse(file):
28 colnames = ['size', 'directory']
29 df = pd.read_csv(filepath_or_buffer=file, sep='\t',
30 skipinitialspace=True, names=colnames)
31
32 df3 = df.query('size.str.isnumeric()')
33 df3 = df3.astype({'size': 'int64'})
34 df4 = df3.sort_values('size')
35 filename = os.path.basename(file)
36 df4['filename'] = filename
37 s = filename
38 s = s.removesuffix('.txt')
39 s = s.removeprefix('res_')
40 d = datetime.datetime.strptime(s, '%Y-%m-%d_%H-%M-%S')
41 df4['date'] = d
42
43 f = FileSize(df4, filename, d)
44 return f
45
46
47def regroupe(f, f2, name):
48 f.df[name] = 0
49 for index, row in f2.df.iterrows():
50 dir0 = row['directory']
51 tmp = f.df.loc[f.df['directory'] == dir0].index
52 # print('tmp', tmp)
53 if not tmp.empty:
54 # tmp['size2']=row['size']
55 # tmp.at[0,'size2']=row['size']
56 # print('trouve', tmp, tmp[0])
57 # print('tmp', tmp)
58 f.df.at[tmp[0], name] = row['size']
59
60
61def supprimeLignesIdentiques(df, diff):
62 # np.where((df['Salary_in_1000']>=100) & (df['Age']< 60) & (df['FT_Team'].str.startswith('S')))
63 # df.query('Salary_in_1000 >= 100 & Age < 60 & FT_Team.str.startswith("S").values')
64 if diff == 0:
65 df3 = df.query('size != size2')
66 else:
67 # df3 = df.query('((size-size2) >'+diff+') and ')
68 # df3 = df.query('(abs(size-size2) >' + str(diff) + ') | ((size-size2) < -' + str(diff) + ') ')
69 df3 = df.query('abs(size-size2) >' + str(diff) + ' ')
70 return df3
71
72
73def main():
74 dir = 'dir/'
75
76 f1 = parse(dir + 'res_2022-11-10_15-25-30.txt')
77 print(f1.df)
78
79 f2 = parse(dir + 'res_2022-10-31_11-24-46.txt')
80 print(f2.df)
81
82 regroupe(f1, f2, 'size2')
83
84 print('df modifie', f1.df)
85
86 diffMax = 1_000_000
87 diffMax = 100_000_000
88
89 max = 0
90 listX = []
91 listY = []
92 listY2 = []
93 listYdiff = []
94 for index, row in f1.df.iterrows():
95 diff = abs(row['size'] - row['size2'])
96 if (diff > max):
97 max = diff
98 if diff >= diffMax:
99 listX.append(row['directory'])
100 listY.append(row['size'])
101 listY2.append(row['size2'])
102 listYdiff.append(abs(row['size'] - row['size2']))
103
104 print('max', max)
105
106 # fig = px.line(x=listX, y=listY, title="sample figure")
107 # print(fig)
108 fig = go.Figure()
109 # Create and style traces
110 fig.add_trace(go.Scatter(x=listX, y=listY, name='val_' + str(f1.date),
111 line=dict(color='firebrick', width=2)))
112 fig.add_trace(go.Scatter(x=listX, y=listY2, name='val_' + str(f2.date),
113 line=dict(color='green', width=2)))
114 fig.add_trace(go.Scatter(x=listX, y=listY2, name='diff',
115 line=dict(color='blue', width=2)))
116 fig.show()
117
118
119# Press the green button in the gutter to run the script.
120if __name__ == '__main__':
121 main()