Coverage for melissa/server/deep_learning/tensorboard/base_logger.py: 36%

64 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-06-13 15:44 +0200

1import logging 

2from abc import ABC, abstractmethod 

3from typing import Any, Optional, Union, List 

4from pathlib import Path 

5from matplotlib.figure import Figure 

6 

7try: 

8 import pandas as pd 

9 import tensorflow as tf 

10 TB_TO_DF = True 

11except ModuleNotFoundError: 

12 TB_TO_DF = False 

13 

14 

15logger = logging.getLogger(__name__) 

16 

17 

18class TensorboardLogger(ABC): 

19 def __init__(self, disable: bool = False, debug: bool = False): 

20 

21 self.disable: bool = disable 

22 self.debug: bool = debug 

23 self._writer: Any = None 

24 

25 @property 

26 def writer(self) -> Any: 

27 assert not self.disable 

28 return self._writer 

29 

30 @abstractmethod 

31 def log_scalar(self, tag: str, scalar_value: Any, step: int): 

32 """Logs scalar to tensorboard logger. 

33 

34 ### Parameters 

35 - **tag** (`str`): Metric tag. 

36 - **scalar_value** (`Any`): Value to log. 

37 - **step** (`int`): Step number.""" 

38 pass 

39 

40 def log_scalars(self, main_tag: str, tag_scalar_dict: dict, step: int): 

41 """Logs several scalars with multi-tag to tensorboard logger. 

42 

43 ### Parameters 

44 - **main_tag** (`str`): The parent name for the tags. 

45 - **tag_scalar_dict** (`dict`): Key-value pair storing the tag and corresponding values. 

46 - **step** (`int`): Step number.""" 

47 pass 

48 

49 @abstractmethod 

50 def log_scalar_dbg(self, tag: str, scalar_value: Any, step: int): 

51 """Logs debugging-related scalar to tensorboard logger. 

52 

53 ### Parameters 

54 - **tag** (`str`): Metric tag. 

55 - **scalar_value** (`Any`): Value to log. 

56 - **step** (`int`): Step number.""" 

57 pass 

58 

59 def log_histogram(self, tag: str, values: Any, step: Optional[int] = None): 

60 """Logs histograms to tensorboard logger. 

61 

62 ### Parameters 

63 - **tag** (`str`): Metric tag. 

64 - **values** (`Any`): Values to log. 

65 - **step** (`Optional[int]`): Step number.""" 

66 pass 

67 

68 def log_figure( 

69 self, 

70 tag: str, 

71 figure: Union[Figure, List[Figure]], 

72 step: Optional[int] = None, 

73 close: bool = True, 

74 ): 

75 """Render matplotlib figure into an image and add it to summary. 

76 

77 ### Parameters 

78 - **tag** (`str`): Figure identifier. 

79 - **figure** (`Union[Figure, List[Figure]]`): Figure or a list of figures. 

80 - **step** (`Optional[int]`): Step number. 

81 - **close** (`bool`) : Flag to automatically close the figure.""" 

82 pass 

83 

84 def close(self): 

85 """Flushes and closes tensorboard logger.""" 

86 pass 

87 

88 

89def convert_tb_logs_to_df(rank: int) -> None: 

90 """Converts local TensorBoard data into Pandas DataFrame. 

91 Saves the pandas dataframe as a pickle file inside 

92 out_dir/tensorboard.""" 

93 

94 if not TB_TO_DF: 

95 logger.error( 

96 "Unable to import dependencies for log. " 

97 "Please install `pandas` and `tensorflow-cpu`." 

98 ) 

99 return 

100 

101 def convert_tfevent(filepath): 

102 records = [] 

103 for raw_record in tf.data.TFRecordDataset(filepath): 

104 event = tf.compat.v1.Event.FromString(raw_record.numpy()) 

105 if event.summary and len(event.summary.value) > 0: 

106 records.append(parse_tfevent(event)) 

107 return pd.DataFrame(records) 

108 

109 def parse_tfevent(tfevent): 

110 return { 

111 "wall_time": tfevent.wall_time, 

112 "name": tfevent.summary.value[0].tag, 

113 "step": tfevent.step, 

114 "value": float(tfevent.summary.value[0].simple_value), 

115 } 

116 

117 columns_order = ["wall_time", "name", "step", "value"] 

118 

119 out = [] 

120 for folder in Path("tensorboard").iterdir(): 

121 if f"gpu_{rank}" in str(folder): 

122 for file in folder.iterdir(): 

123 if "events.out.tfevents" not in str(file): 

124 continue 

125 if f"rank_{rank}" not in str(file): 

126 continue 

127 logger.info(f"Parsing {str(file)}") 

128 out.append(convert_tfevent(str(file))) 

129 

130 if out: 

131 all_df = pd.concat(out)[columns_order] 

132 all_df.reset_index(drop=True, inplace=True) 

133 all_df.to_pickle(f"./tensorboard/data_rank_{rank}.pkl") 

134 else: 

135 logger.warning("No valid TensorBoard event files found.")