Version: 1.0
drag_compressor.h
Go to the documentation of this file.
1 // Copyright (c) 2020, the GRAPHGEN contributors, as
2 // shown by the AUTHORS file. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
6 
7 #ifndef GRAPHGEN_DRAG_COMPRESSOR_H_
8 #define GRAPHGEN_DRAG_COMPRESSOR_H_
9 
10 #include <algorithm>
11 #include <iterator>
12 #include <set>
13 #include <unordered_set>
14 
15 #include "conact_tree.h"
16 #include "drag_statistics.h"
17 #include "collect_drag_stats.h"
18 #include "output_generator.h"
19 #include "remove_equal_subtrees.h"
20 #include "forest2dag.h"
21 #include "forest_statistics.h"
22 #include "graph_code_generator.h"
23 
24 /*BinaryDrag<conact>::node* MergeEquivalentTreesRec(BinaryDrag<conact>::node* a, BinaryDrag<conact>::node* b, std::unordered_map<BinaryDrag<conact>::node*, BinaryDrag<conact>::node*> &merged)
25  {
26  auto it = merged.find(a);
27  if (it != end(merged))
28  return it->second;
29 
30  auto n = new BinaryDrag<conact>::node(*a);
31  if (a->isleaf()) {
32  n->data.action &= b->data.action;
33  }
34  else {
35  n->left = MergeEquivalentTreesRec(a->left, b->left, merged);
36  n->right = MergeEquivalentTreesRec(a->right, b->right, merged);
37  }
38  merged[a] = n;
39  return n;
40  }
41 
42  void DeleteTreeRec(BinaryDrag<conact>::node* n, std::unordered_map<BinaryDrag<conact>::node*, bool> &deleted)
43  {
44  auto it = deleted.find(n);
45  if (it != end(deleted))
46  return;
47  deleted[n] = true;
48 
49  if (!n->isleaf()) {
50  DeleteTreeRec(n->left, deleted);
51  DeleteTreeRec(n->right, deleted);
52  }
53  delete n;
54  }*/
55 
56  /*void PrintTreeRec(std::ostream& os, BinaryDrag<conact>::node* n, std::set<BinaryDrag<conact>::node*>& visited, int tab = 0) {
57  os << std::string(tab, '\t');
58 
59  auto it = visited.find(n);
60  if (it != end(visited)) {
61  os << " -> " << n << "\n";
62  return;
63  }
64  visited.insert(n);
65 
66  if (n->isleaf()) {
67  os << ". ";
68  auto a = n->data.actions();
69  copy(begin(a), end(a), std::ostream_iterator<int>(os, ", "));
70  os << "\n";
71  }
72  else {
73  os << n->data.condition << "\n";
74  PrintTreeRec(os, n->left, visited, tab + 1);
75  PrintTreeRec(os, n->right, visited, tab + 1);
76  }
77 
78  }*/
79 
92 private:
93  std::unordered_set<const BinaryDrag<conact>::node*> visited_;
94  bool removed_;
95 public:
97  do {
98  removed_ = false;
99  for (auto& t : bd.roots_) {
101  }
102  } while (removed_);
103  }
104 
106 
107  if (n->isleaf()) {
108  return;
109  }
110 
111  auto& left = n->left;
112  auto& right = n->right;
113  if (left->isleaf() && right->isleaf()) {
114  if (left->data.eq(right->data)) {
115  // In this case the current node becomes a leaf. The action associated to this
116  // leaf will be the intersection of the actions of its children.
117  n->data = left->data;
118  n->data.action &= right->data.action;
119  left = right = nullptr;
120  removed_ = true;
121  return;
122  }
123  }
124 
125  if (visited_.insert(n).second) {
126  MergeSpecialLeavesRec(n->left);
127  MergeSpecialLeavesRec(n->right);
128  }
129  }
130 };
131 
132 class MergeLeaves {
133 private:
134  std::unordered_set<BinaryDrag<conact>::node*> visited_nodes_;
135  std::unordered_set<BinaryDrag<conact>::node*> visited_leaves_;
136  std::unordered_map<BinaryDrag<conact>::node*, std::vector<BinaryDrag<conact>::node*>> parents_;
137  BinaryDrag<conact>& bd_;
138 public:
140  for (auto& t : bd.roots_) {
142  }
143  CompressLeaves();
144  }
145 
146  // Yet another function to populate visited nodes/leaves and parents node data structure
148 
149  if (n->isleaf()) {
150  visited_leaves_.insert(n);
151  return;
152  }
153 
154  if (visited_nodes_.insert(n).second) {
155  parents_[n->left].push_back(n);
156  parents_[n->right].push_back(n);
157 
158  CalculateStatsRec(n->left);
159  CalculateStatsRec(n->right);
160  }
161  }
162 
163  void SerializeVisitedLeaves(std::ostream& os = std::cout) {
164  for (const auto& l : visited_leaves_) {
165  for (const auto& a : l->data.actions()) {
166  os << a;
167  }
168  os << "- " << l->data.next << "\n";
169  }
170  }
171 
172  void CompressLeaves() {
173 
174  std::unordered_set<BinaryDrag<conact>::node*> already_updated; // Store the leaves for which the parents have already been updated
175  // For each actually used leaf
176  for(auto& i : visited_leaves_){
177  // Skip multiple actions leaves
178  if (i->data.actions().size() != 1 || already_updated.find(i) != end(already_updated)){
179  continue;
180  }
181 
182  // Compare the current leaf's action with the actions of all the others leaves
183  // and merge them (updating parents) if there is a non-empty intersection and
184  // the next tree ids are the same
185  for (auto& j : visited_leaves_){
186  if(i == j){
187  continue; // We don't want to compare a leaf with itself
188  }
189 
190  if (already_updated.find(j) != end(already_updated)) {
191  continue; // We don't want to consider the same leaf multiple times
192  }
193 
194  if ((i->data.action & j->data.action) != 0 && i->data.next == j->data.next) {
195  // Merge required!
196 
197  // Add them to the already_updated
198  already_updated.insert(j);
199 
200  // Update j's parents so that they will point to i.
201  for (auto& x : parents_[j]) {
202  if (x->left == j) {
203  x->left = i;
204  }
205  else if (x->right == j) { // This should be always true, but...
206  x->right = i;
207  }
208  else {
209  throw; // Bug catcher
210  }
211  }
212  }
213  //DrawDagOnFile("current_dag", bd_, DrawDagFlags::WITH_NEXT | DrawDagFlags::WITH_ROOT_ID);
214  }
215  }
216  }
217 };
218 
219 
222  NONE = 0,
223  PRINT_STATUS_BAR = 1,
224  IGNORE_LEAVES = 2,
228  SAVE_INTERMEDIATE_RESULTS = 4,
229 )
230 
231 //DEFINE_ENUM_CLASS_OR_OPERATOR(DragCompressorFlags)
232 //DEFINE_ENUM_CLASS_AND_OPERATOR(DragCompressorFlags)
233 
234 // Compress a tree / forest into a DRAG solving equivalences
236 private:
237  bool changes_;
238 
239  bool early_stopping_active_;
240  bool early_stopping_reached_ = false;
241  int iterations_max_;
242  int iterations_left_;
243 
244  size_t progress_counter_ = 0;
245  size_t best_nodes_ = std::numeric_limits<size_t >::max();
246  size_t best_leaves_ = std::numeric_limits<size_t >::max();
247 public:
248 
250  bool print_status_bar = flags & DragCompressorFlags::PRINT_STATUS_BAR;
251  if (print_status_bar) {
252  std::cout << "\r" << progress_counter_ << "\n";
253  }
254  }
255 
256  // BinaryDrag
257  // Iteration is the early stopping criteria and represents the number of "iteration"
258  // after a new optimal is found. -1 means no early stopping and it's the dafult
260  early_stopping_active_ = iterations != -1;
261  iterations_left_ = iterations_max_ = iterations;
262  TLOG("Compressing BinaryDrag",
263  RemoveEqualSubtrees{ bd };
264  do {
265  changes_ = false;
266  FastDragOptimizerRec(bd, flags);
267  bd = best_bd_;
268  ResetIterations();
269  } while (changes_);
270 
271  UpdateProgress(flags);
272  )
273  }
274 
276  iterations_left_ = iterations_max_;
277  early_stopping_reached_ = false;
278  }
279 
280  // LineForestHandler
282  early_stopping_active_ = iterations != -1;
283  iterations_left_ = iterations_max_ = iterations;
284  std::string msg = conf.algorithm_name_ + " - compressing main forest\n";
285  TLOG(msg,
286  RemoveEqualSubtrees{ lfh.f_ };
287  do {
288  changes_ = false;
289  FastDragOptimizerRec(lfh.f_, flags);
290  lfh.f_ = best_bd_;
291  ResetIterations();
292  }while(changes_);
293  UpdateProgress(flags);
294  )
295 
296  int fn = 0;
297  for (auto& f : lfh.end_forests_) {
298  progress_counter_ = 0;
299  ResetIterations();
300  msg = conf.algorithm_name_ + " - compressing end forest #" + std::to_string(fn++) + "\n";
301  TLOG(msg,
302  RemoveEqualSubtrees{ f };
303  ResetBest();
304  do {
305  changes_ = false;
306  FastDragOptimizerRec(f, flags);
307  f = best_bd_;
308  ResetIterations();
309  } while (changes_);
310  UpdateProgress(flags);)
311  }
312  }
313 
314 private:
315 
316  void ResetBest() {
317  best_nodes_ = std::numeric_limits<size_t >::max();
318  best_leaves_ = std::numeric_limits<size_t >::max();
319  }
320 
321  // This class perform the merge of equivalent trees updating links
322  struct MergeEquivalentTreesAndUpdate {
323  std::unordered_set<BinaryDrag<conact>::node*> visited_;
324  std::unordered_map<BinaryDrag<conact>::node*, std::vector<BinaryDrag<conact>::node*>>& parents_;
325 
326  MergeEquivalentTreesAndUpdate(BinaryDrag<conact>::node* a,
328  std::unordered_map<BinaryDrag<conact>::node*, std::vector<BinaryDrag<conact>::node*>>& parents) :
329  parents_{ parents }
330  {
331  MergeEquivalentTreesAndUpdateRec(a, b);
332  }
333 
334  void MergeEquivalentTreesAndUpdateRec(BinaryDrag<conact>::node* a, BinaryDrag<conact>::node* b)
335  {
336  auto it = visited_.find(a);
337  if (it != end(visited_))
338  return;
339  visited_.insert(a);
340 
341  for (auto& x : parents_[b]) {
342  if (x->left == b)
343  x->left = a;
344  else
345  x->right = a;
346  }
347 
348  if (a->isleaf()) {
349  a->data.action &= b->data.action;
350  }
351  else {
352  MergeEquivalentTreesAndUpdateRec(a->left, b->left);
353  MergeEquivalentTreesAndUpdateRec(a->right, b->right);
354  }
355  }
356  };
357 
358  BinaryDrag<conact> best_bd_;
359 
360  void FastDragOptimizerRec(BinaryDrag<conact>& bd, DragCompressorFlags flags)
361  {
362  if (early_stopping_reached_ && early_stopping_active_) {
363  return;
364  }
365 
366  bool print_status_bar = flags & DragCompressorFlags::PRINT_STATUS_BAR;
367  bool ignore_leaves = flags & DragCompressorFlags::IGNORE_LEAVES;
368  bool save_intermediate_results = flags & DragCompressorFlags::SAVE_INTERMEDIATE_RESULTS;
369 
370  // Collect information of trees'/drags' subtrees (as strings) and push
371  // them into a vector so that they are easier to use
372  std::vector<CollectDragStatistics::STreeProp> trees;
373  CollectDragStatistics cds(bd);
374  for (const auto& x : cds.np_) {
375  trees.push_back(x.second);
376  }
377 
378  // For each subtree (with or without considering leaves) ...
379  for (size_t i = 0; i < trees.size(); ) {
380  if (ignore_leaves && trees[i].conditions_ == ".") {
381  trees.erase(begin(trees) + i);
382  continue;
383  }
384  bool eq = false;
385  // ... check all the other subtrees to find equivalences
386  for (size_t j = 0; j < trees.size(); ++j) {
387  if (i != j && trees[i].equivalent(trees[j])) {
388  eq = true;
389  break;
390  }
391  }
392  // If there the current subtree has no equivalent subtrees
393  // remove it from the list.
394  if (!eq) {
395  trees.erase(begin(trees) + i);
396  }
397  else {
398  ++i;
399  }
400  }
401 
402  // Then we order subtrees which have equivalent from the deepest to the least
403  // deep. This step is somehow useless since later all the possible merges are
404  // tested. Ordering the tree is useful to find, in some cases, the best or
405  // pseudo optimal solution earlier, so that when the process does not end
406  // in "good time" we still have a good solution/compression.
407  sort(begin(trees), end(trees), [](const CollectDragStatistics::STreeProp& a, const CollectDragStatistics::STreeProp& b) {
408  return a.conditions_.size() > b.conditions_.size();
409  });
410 
411  // Compare each subtree which has equivalent subtrees with all the others
412  bool no_eq = true;
413  for (size_t i = 0; i < trees.size(); ++i) {
414  bool eq = false;
415  size_t j;
416  for (j = i + 1; j < trees.size(); ++j) {
417  if (trees[i].equivalent(trees[j])) {
418  // Here an equivalence is found!
419  no_eq = false;
420 
421  // Then we need to copy the original (entire) tree
422  // keeping track of some nodes. Indeed, to perform
423  // the link update, we need to know the pointers to
424  // the roots of the equivalent subtrees after the
425  // copy.
426  std::vector<BinaryDrag<conact>::node*> tracked_nodes{ trees[i].n_, trees[j].n_ };
427  BinaryDrag<conact> bd_copy(bd, tracked_nodes);
428 
429  // Re-calculate subtrees/node statistics over the tree
430  // copy since pointers to nodes are changed.
431  CollectDragStatistics cds(bd_copy);
432 
433  // Perform the merge of equivalent trees updating links
434  MergeEquivalentTreesAndUpdate(tracked_nodes[0], tracked_nodes[1], cds.parents_);
435 
436  // Remove equal subtrees inside a BinaryDrag. Is this really
437  // necessary here? Maybe it isn't but performing this operation
438  // here can improve the efficiency of the compression in case
439  // there are actually equal sub-trees.
440  RemoveEqualSubtrees{ bd_copy };
441 
442  // Recursively call the compression function on the current
443  // resulting tree
444  FastDragOptimizerRec(bd_copy, flags);
445  }
446  }
447  }
448 
449  if (no_eq) {
450  // Display a raw progress status if needed
451  ++progress_counter_;
452  if (print_status_bar) {
453  if (progress_counter_ % 1000 == 0) {
454  std::cout << "\r" << progress_counter_ << std::flush;
455  }
456  }
457 
458  if (--iterations_left_ <= 0) {
459  early_stopping_reached_ = true;
460  }
461 
462  BinaryDragStatistics bds(bd);
463  if (bds.Nodes() < best_nodes_) {
464  // New better binary drag found ...
465  changes_ = true;
466  iterations_left_ = iterations_max_;
467 
468  // ... compress the leaves of the current optimal binary drag
469  MergeSpecialLeaves{ bd };
470  MergeLeaves{ bd };
471 
472  // ... and finally update class attributes accordingly
473  BinaryDragStatistics bds(bd);
474  best_nodes_ = bds.Nodes();
475  best_leaves_ = bds.Leaves();
476  best_bd_ = bd;
477 
478  // ... save the current tree if needed
479  if (save_intermediate_results) {
480  DrawDagOnFile("BestDrag" + zerostr(progress_counter_, 10), bd);
481  }
482 
483  // ... print status if needed
484  if (print_status_bar) {
485  std::cout << "\r" << progress_counter_ << " - nodes: " << bds.Nodes() << "; leaves: " << bds.Leaves() << "\n";
486  }
487 
488  }
489  }
490  }
491 };
492 
493 #endif // GRAPHGEN_DRAG_COMPRESSOR_H_
std::vector< node * > roots_
Definition: drag.h:58
Calculates the statistics of a binary drag with one or multiple roots.
void UpdateProgress(DragCompressorFlags flags)
DragCompressor(BinaryDrag< conact > &bd, int iterations=-1, DragCompressorFlags flags=DragCompressorFlags::PRINT_STATUS_BAR|DragCompressorFlags::IGNORE_LEAVES)
DragCompressor(LineForestHandler &lfh, int iterations=-1, DragCompressorFlags flags=DragCompressorFlags::PRINT_STATUS_BAR|DragCompressorFlags::IGNORE_LEAVES)
void SerializeVisitedLeaves(std::ostream &os=std::cout)
void CalculateStatsRec(BinaryDrag< conact >::node *&n)
void CompressLeaves()
MergeLeaves(BinaryDrag< conact > &bd)
void MergeSpecialLeavesRec(BinaryDrag< conact >::node *n)
MergeSpecialLeaves(BinaryDrag< conact > &bd)
DragCompressorFlags
This enum class defines the available flags for the DragCompression class.
@ SAVE_INTERMEDIATE_RESULTS
Whether to delete or not the dot code used to draw the drag.
@ IGNORE_LEAVES
Whether to ignore leaves or not during the compression. Please note that compressing the leaves will ...
@ PRINT_STATUS_BAR
Whether to print a sort of progress bar or not.
bool DrawDagOnFile(const string &base_filename, const BinaryDrag< conact > &t, DrawDagFlags flags)
std::string algorithm_name_
Definition: config_data.h:30
Generates all the forests needed to handle one line of the image.
Definition: forest.h:51
BinaryDrag< conact > f_
Definition: forest.h:92
std::vector< BinaryDrag< conact > > end_forests_
Definition: forest.h:93
This class allows to "remove" equal subtrees from a BinaryDrag.
ConfigData conf
Definition: utilities.cpp:9
#define TLOG(message, instructions)
Definition: utilities.h:93
std::string zerostr(const T &val, size_t n)
Definition: utilities.h:69
#define DEFINE_ENUM_CLASS_FLAGS(class_name,...)
Definition: utilities.h:41