Calcul la taille des répertoires

  1# This is a sample Python script.
  2import datetime
  3import os
  4
  5import pandas as pd
  6from io import StringIO
  7
  8import plotly.express as px
  9
 10import plotly.graph_objects as go
 11
 12# pd.DataFrame({'A': [1, 2, 3]})
 13# # print(f'test: {pd.DataFrame}')
 14#
 15# index = pd.date_range("1/1/2000", periods=8)
 16#
 17# print(index)
 18
 19
 20class FileSize:
 21    def __init__(self, df, filename, date):
 22        self.df = df
 23        self.filename = filename
 24        self.date = date
 25
 26
 27def parse(file):
 28    colnames = ['size', 'directory']
 29    df = pd.read_csv(filepath_or_buffer=file, sep='\t',
 30                     skipinitialspace=True, names=colnames)
 31
 32    df3 = df.query('size.str.isnumeric()')
 33    df3 = df3.astype({'size': 'int64'})
 34    df4 = df3.sort_values('size')
 35    filename = os.path.basename(file)
 36    df4['filename'] = filename
 37    s = filename
 38    s = s.removesuffix('.txt')
 39    s = s.removeprefix('res_')
 40    d = datetime.datetime.strptime(s, '%Y-%m-%d_%H-%M-%S')
 41    df4['date'] = d
 42
 43    f = FileSize(df4, filename, d)
 44    return f
 45
 46
 47def regroupe(f, f2, name):
 48    f.df[name] = 0
 49    for index, row in f2.df.iterrows():
 50        dir0 = row['directory']
 51        tmp = f.df.loc[f.df['directory'] == dir0].index
 52        # print('tmp', tmp)
 53        if not tmp.empty:
 54            # tmp['size2']=row['size']
 55            # tmp.at[0,'size2']=row['size']
 56            # print('trouve', tmp, tmp[0])
 57            # print('tmp', tmp)
 58            f.df.at[tmp[0], name] = row['size']
 59
 60
 61def supprimeLignesIdentiques(df, diff):
 62    # np.where((df['Salary_in_1000']>=100) & (df['Age']< 60) & (df['FT_Team'].str.startswith('S')))
 63    # df.query('Salary_in_1000 >= 100 & Age < 60 & FT_Team.str.startswith("S").values')
 64    if diff == 0:
 65        df3 = df.query('size != size2')
 66    else:
 67        # df3 = df.query('((size-size2) >'+diff+') and ')
 68        # df3 = df.query('(abs(size-size2) >' + str(diff) + ') |  ((size-size2) < -' + str(diff) + ') ')
 69        df3 = df.query('abs(size-size2) >' + str(diff) + ' ')
 70    return df3
 71
 72
 73def main():
 74    dir = 'dir/'
 75
 76    f1 = parse(dir + 'res_2022-11-10_15-25-30.txt')
 77    print(f1.df)
 78
 79    f2 = parse(dir + 'res_2022-10-31_11-24-46.txt')
 80    print(f2.df)
 81
 82    regroupe(f1, f2, 'size2')
 83
 84    print('df modifie', f1.df)
 85
 86    diffMax = 1_000_000
 87    diffMax = 100_000_000
 88
 89    max = 0
 90    listX = []
 91    listY = []
 92    listY2 = []
 93    listYdiff = []
 94    for index, row in f1.df.iterrows():
 95        diff = abs(row['size'] - row['size2'])
 96        if (diff > max):
 97            max = diff
 98        if diff >= diffMax:
 99            listX.append(row['directory'])
100            listY.append(row['size'])
101            listY2.append(row['size2'])
102            listYdiff.append(abs(row['size'] - row['size2']))
103
104    print('max', max)
105
106    # fig = px.line(x=listX, y=listY, title="sample figure")
107    # print(fig)
108    fig = go.Figure()
109    # Create and style traces
110    fig.add_trace(go.Scatter(x=listX, y=listY, name='val_' + str(f1.date),
111                             line=dict(color='firebrick', width=2)))
112    fig.add_trace(go.Scatter(x=listX, y=listY2, name='val_' + str(f2.date),
113                             line=dict(color='green', width=2)))
114    fig.add_trace(go.Scatter(x=listX, y=listY2, name='diff',
115                             line=dict(color='blue', width=2)))
116    fig.show()
117
118
119# Press the green button in the gutter to run the script.
120if __name__ == '__main__':
121    main()