Gymbo
symbolic.h
Go to the documentation of this file.
1 
7 #pragma once
8 #include "smt.h"
9 
10 namespace gymbo {
11 
22 inline bool is_target_pc(const std::unordered_set<int> &target_pcs, int pc) {
23  return ((target_pcs.size() == 0) ||
24  (target_pcs.find(-1) != target_pcs.end()) ||
25  (target_pcs.find(pc) != target_pcs.end()));
26 }
27 
40 inline bool explore_further(int maxDepth, int maxSAT, int maxUNSAT) {
41  return maxDepth > 0 && maxSAT > 0 && maxUNSAT > 0;
42 }
43 
58 inline void call_smt_solver(bool &is_sat, SymState &state,
59  std::unordered_map<int, float> &params,
60  GDOptimizer &optimizer, int max_num_trials,
61  bool ignore_memory, bool use_dpll) {
62  if (use_dpll) {
63  smt_dpll_solver(is_sat, state, params, optimizer, max_num_trials,
64  ignore_memory);
65  } else {
66  smt_union_solver(is_sat, state, params, optimizer, max_num_trials,
67  ignore_memory);
68  }
69 }
70 
87 inline void verbose_constraints(int verbose_level,
88  bool is_unknown_path_constraint, bool is_target,
89  bool is_sat, int pc,
90  std::string constraints_str,
91  const SymState &state,
92  const std::unordered_map<int, float> &params) {
93  if ((verbose_level >= 1 && is_unknown_path_constraint && is_target) ||
94  (verbose_level >= 2)) {
95  if (!is_sat) {
96  printf("\x1b[31m");
97  } else {
98  printf("\x1b[32m");
99  }
100  printf("pc=%d, IS_SAT - %d\x1b[39m, %s, params = {", pc, is_sat,
101  constraints_str.c_str());
102  for (auto &p : params) {
103  // ignore concrete variables
104  if (state.mem.find(p.first) != state.mem.end()) {
105  continue;
106  }
107  // only show symbolic variables
108  if (is_integer(p.second)) {
109  printf("%d: %d, ", p.first, (int)p.second);
110  } else {
111  printf("%d: %f, ", p.first, p.second);
112  }
113  }
114  printf("}\n");
115  }
116 }
117 
128 inline void verbose_pre(int verbose_level, int pc, Prog &prog,
129  SymState &state) {
130  if (verbose_level > -1) {
131  printf("pc: %d, ", pc);
132  prog[pc].print();
133  if (verbose_level >= 2) {
134  state.print();
135  }
136  }
137 }
138 
146 inline void verbose_post(int verbose_level) {
147  if (verbose_level >= 2) {
148  printf("---\n");
149  }
150 }
151 
164 inline void symStep(SymState *state, Instr &instr,
165  std::vector<SymState *> &result) {
166  // SymState state = state;
167 
168  switch (instr.instr) {
169  case InstrType::Not: {
170  Sym *w = state->symbolic_stack.back();
171  state->symbolic_stack.pop();
172  state->pc++;
173  state->symbolic_stack.push(Sym(SymType::SNot, w));
174  result.emplace_back(state);
175  break;
176  }
177  case InstrType::Add: {
178  Sym *r = state->symbolic_stack.back();
179  state->symbolic_stack.pop();
180  Sym *l = state->symbolic_stack.back();
181  state->symbolic_stack.pop();
182  state->pc++;
183  state->symbolic_stack.push(Sym(SymType::SAdd, l, r));
184  result.emplace_back(state);
185  break;
186  }
187  case InstrType::Sub: {
188  Sym *r = state->symbolic_stack.back();
189  state->symbolic_stack.pop();
190  Sym *l = state->symbolic_stack.back();
191  state->symbolic_stack.pop();
192  state->pc++;
193  state->symbolic_stack.push(Sym(SymType::SSub, l, r));
194  result.emplace_back(state);
195  break;
196  }
197  case InstrType::Mul: {
198  Sym *r = state->symbolic_stack.back();
199  state->symbolic_stack.pop();
200  Sym *l = state->symbolic_stack.back();
201  state->symbolic_stack.pop();
202  state->pc++;
203  state->symbolic_stack.push(Sym(SymType::SMul, l, r));
204  result.emplace_back(state);
205  break;
206  }
207  case InstrType::And: {
208  Sym *r = state->symbolic_stack.back();
209  state->symbolic_stack.pop();
210  Sym *l = state->symbolic_stack.back();
211  state->symbolic_stack.pop();
212  state->pc++;
213  state->symbolic_stack.push(Sym(SymType::SAnd, l, r));
214  result.emplace_back(state);
215  break;
216  }
217  case InstrType::Or: {
218  Sym *r = state->symbolic_stack.back();
219  state->symbolic_stack.pop();
220  Sym *l = state->symbolic_stack.back();
221  state->symbolic_stack.pop();
222  state->pc++;
223  state->symbolic_stack.push(Sym(SymType::SOr, l, r));
224  result.emplace_back(state);
225  break;
226  }
227  case InstrType::Lt: {
228  Sym *r = state->symbolic_stack.back();
229  state->symbolic_stack.pop();
230  Sym *l = state->symbolic_stack.back();
231  state->symbolic_stack.pop();
232  state->pc++;
233  state->symbolic_stack.push(Sym(SymType::SLt, l, r));
234  result.emplace_back(state);
235  break;
236  }
237  case InstrType::Le: {
238  Sym *r = state->symbolic_stack.back();
239  state->symbolic_stack.pop();
240  Sym *l = state->symbolic_stack.back();
241  state->symbolic_stack.pop();
242  state->pc++;
243  state->symbolic_stack.push(Sym(SymType::SLe, l, r));
244  result.emplace_back(state);
245  break;
246  }
247  case InstrType::Eq: {
248  Sym *r = state->symbolic_stack.back();
249  state->symbolic_stack.pop();
250  Sym *l = state->symbolic_stack.back();
251  state->symbolic_stack.pop();
252  state->pc++;
253  state->symbolic_stack.push(Sym(SymType::SEq, l, r));
254  result.emplace_back(state);
255  break;
256  }
257  case InstrType::Swap: {
258  Sym *x = state->symbolic_stack.back();
259  state->symbolic_stack.pop();
260  Sym *y = state->symbolic_stack.back();
261  state->symbolic_stack.pop();
262  state->pc++;
263  state->symbolic_stack.push(*x);
264  state->symbolic_stack.push(*y);
265  result.emplace_back(state);
266  break;
267  }
268  case InstrType::Store: {
269  Sym *addr = state->symbolic_stack.back();
270  state->symbolic_stack.pop();
271  Sym *w = state->symbolic_stack.back();
272  state->symbolic_stack.pop();
273  if (w->symtype == SymType::SCon) {
274  if (state->mem.find(wordToInt(addr->var_idx)) ==
275  state->mem.end()) {
276  state->mem.emplace(wordToInt(addr->var_idx), w->word);
277  } else {
278  state->mem.at(wordToInt(addr->var_idx)) = w->word;
279  }
280  } else if ((w->symtype == SymType::SAny) &&
281  (state->mem.find(wordToInt(w->var_idx)) !=
282  state->mem.end())) {
283  if (state->mem.find(wordToInt(addr->var_idx)) ==
284  state->mem.end()) {
285  state->mem.emplace(wordToInt(addr->var_idx),
286  state->mem[wordToInt(w->var_idx)]);
287  } else {
288  state->mem.at(wordToInt(addr->var_idx)) =
289  state->mem[wordToInt(w->var_idx)];
290  }
291  } else {
292  if (state->smem.find(wordToInt(addr->var_idx)) ==
293  state->smem.end()) {
294  if (state->smem.find(wordToInt(w->var_idx)) ==
295  state->smem.end()) {
296  state->smem.emplace(wordToInt(addr->var_idx), *w);
297  } else {
298  state->smem.emplace(wordToInt(addr->var_idx),
299  state->smem[wordToInt(w->var_idx)]);
300  }
301  } else {
302  if (state->smem.find(wordToInt(w->var_idx)) ==
303  state->smem.end()) {
304  state->smem.at(wordToInt(addr->var_idx)) = *w;
305  } else {
306  state->smem.at(wordToInt(addr->var_idx)) =
307  state->smem[wordToInt(w->var_idx)];
308  }
309  }
310  }
311  state->pc++;
312  result.emplace_back(state);
313  break;
314  }
315  case InstrType::Load: {
316  Sym *addr = state->symbolic_stack.back();
317  state->symbolic_stack.pop();
318  if (state->smem.find(wordToInt(addr->word)) != state->smem.end()) {
319  state->symbolic_stack.push(state->smem[wordToInt(addr->word)]);
320  } else {
321  state->symbolic_stack.push(Sym(SymType::SAny, addr->word));
322  }
323  state->pc++;
324  result.emplace_back(state);
325  break;
326  }
327  case InstrType::Read: {
328  state->symbolic_stack.push(Sym(SymType::SAny, state->var_cnt));
329  state->pc++;
330  state->var_cnt++;
331  result.emplace_back(state);
332  break;
333  }
334  case InstrType::Push: {
335  state->symbolic_stack.push(Sym(SymType::SCon, instr.word));
336  state->pc++;
337  result.emplace_back(state);
338  break;
339  }
340  case InstrType::Dup: {
341  Sym *w = state->symbolic_stack.back();
342  state->symbolic_stack.pop();
343  state->pc++;
344  state->symbolic_stack.push(*w);
345  state->symbolic_stack.push(*w);
346  result.emplace_back(state);
347  break;
348  }
349  case InstrType::Pop: {
350  state->symbolic_stack.pop();
351  state->pc++;
352  result.emplace_back(state);
353  break;
354  }
355  case InstrType::JmpIf: {
356  Sym *cond = state->symbolic_stack.back()->psimplify(state->mem);
357  state->symbolic_stack.pop();
358  Sym *addr = state->symbolic_stack.back();
359  state->symbolic_stack.pop();
360  if (addr->symtype == SymType::SCon) {
361  SymState *true_state = state->copy();
362  SymState *false_state = state->copy();
363  true_state->pc += wordToInt(addr->word - 2);
364  true_state->path_constraints.emplace_back(*cond);
365  false_state->pc++;
366  false_state->path_constraints.emplace_back(
367  Sym(SymType::SNot, cond));
368  result.emplace_back(true_state);
369  result.emplace_back(false_state);
370  }
371  break;
372  }
373  case InstrType::Jmp: {
374  Sym *addr = state->symbolic_stack.back();
375  state->symbolic_stack.pop();
376  state->pc += wordToInt(addr->word);
377  result.emplace_back(state);
378  break;
379  }
380  case InstrType::Nop: {
381  state->pc++;
382  result.emplace_back(state);
383  break;
384  }
385  case InstrType::Done: {
386  break;
387  }
388  default:
389  fprintf(stderr, "Detect unsupported instruction\n");
390  }
391 }
392 
397 struct BaseExecutor {
400  int maxSAT;
401  int maxUNSAT;
407  bool use_dpll;
411 
430  int max_num_trials = 10, bool ignore_memory = false,
431  bool use_dpll = false, int verbose_level = 0,
432  bool return_trace = false)
433  : optimizer(optimizer),
434  maxSAT(maxSAT),
441 
442  virtual bool solve(bool is_target, int pc, SymState &state) = 0;
443  virtual Trace run(Prog &prog, std::unordered_set<int> &target_pcs,
444  SymState &state, int maxDepth) = 0;
445 };
446 
452 struct SExecutor : public BaseExecutor {
455 
457 
469  bool solve(bool is_target, int pc, SymState &state) {
470  bool is_sat = true;
471  std::string constraints_str = state.toString(false);
472 
473  std::unordered_map<int, float> params = {};
474  initialize_params(params, state, ignore_memory);
475 
476  bool is_unknown_path_constraint = true;
477 
478  if (constraints_cache.find(constraints_str) !=
479  constraints_cache.end()) {
480  is_sat = constraints_cache[constraints_str].first;
481  params = constraints_cache[constraints_str].second;
482  is_unknown_path_constraint = false;
483  } else {
484  call_smt_solver(is_sat, state, params, optimizer, max_num_trials,
486  if (is_sat) {
487  maxSAT--;
488  } else {
489  maxUNSAT--;
490  }
491  constraints_cache.emplace(constraints_str,
492  std::make_pair(is_sat, params));
493  }
494 
495  if (verbose_level >= 1) {
496  verbose_constraints(verbose_level, is_unknown_path_constraint,
497  is_target, is_sat, pc, constraints_str, state,
498  params);
499  }
500 
501  return is_sat;
502  }
503 
519  Trace run(Prog &prog, std::unordered_set<int> &target_pcs, SymState &state,
520  int maxDepth = 256) {
521  int pc = state.pc;
522  bool is_target = is_target_pc(target_pcs, pc);
523  bool is_sat = true;
524 
525  verbose_pre(verbose_level, pc, prog, state);
526 
527  if (state.path_constraints.size() != 0 && is_target) {
528  is_sat = solve(is_target, pc, state);
529  }
531 
532  if ((prog[pc].instr == InstrType::Done) || (!is_sat)) {
533  return Trace(state, {});
534  } else if (explore_further(maxDepth, maxSAT, maxUNSAT)) {
535  Instr instr = prog[pc];
536  std::vector<SymState *> newStates;
537  symStep(&state, instr, newStates);
538  std::vector<Trace> children;
539  for (SymState *newState : newStates) {
540  Trace child = run(prog, target_pcs, *newState, maxDepth - 1);
541  if (return_trace) {
542  children.push_back(child);
543  }
544  }
545  return Trace(state, children);
546  } else {
547  return Trace(state, {});
548  }
549  }
550 };
551 
552 } // namespace gymbo
Class representing an instruction.
Definition: type.h:53
Word32 word
Definition: type.h:56
InstrType instr
Definition: type.h:55
Definition: compiler.h:11
bool explore_further(int maxDepth, int maxSAT, int maxUNSAT)
Checks if further exploration is allowed based on maximum depth and satisfiability conditions.
Definition: symbolic.h:40
int wordToInt(uint32_t word)
Converts a 32-bit word representation to an integer value.
Definition: utils.h:66
void call_smt_solver(bool &is_sat, SymState &state, std::unordered_map< int, float > &params, GDOptimizer &optimizer, int max_num_trials, bool ignore_memory, bool use_dpll)
Calls the SMT solver based on the specified options.
Definition: symbolic.h:58
bool is_integer(float x)
Checks if a float is an integer.
Definition: utils.h:25
void verbose_constraints(int verbose_level, bool is_unknown_path_constraint, bool is_target, bool is_sat, int pc, std::string constraints_str, const SymState &state, const std::unordered_map< int, float > &params)
Prints a verbose for conflicts solving if conditions are met.
Definition: symbolic.h:87
void initialize_params(std::unordered_map< int, float > &params, SymState &state, bool ignore_memory)
Initialize Parameter Values from Memory.
Definition: smt.h:23
void smt_union_solver(bool &is_sat, SymState &state, std::unordered_map< int, float > &params, GDOptimizer &optimizer, int max_num_trials, bool ignore_memory)
SMT Solver with Unified Loss Function.
Definition: smt.h:44
bool is_target_pc(const std::unordered_set< int > &target_pcs, int pc)
Checks if the given program counter (pc) is a target program counter.
Definition: symbolic.h:22
void verbose_pre(int verbose_level, int pc, Prog &prog, SymState &state)
Prints a verbose representation before solving constraints.
Definition: symbolic.h:128
void symStep(SymState *state, Instr &instr, std::vector< SymState * > &result)
Symbolically Execute a Single Instruction of a Program.
Definition: symbolic.h:164
std::vector< Instr > Prog
Alias for a program, represented as a vector of instructions.
Definition: type.h:143
void verbose_post(int verbose_level)
Prints a verbose representation after solving constraints.
Definition: symbolic.h:146
void smt_dpll_solver(bool &is_sat, SymState &state, std::unordered_map< int, float > &params, GDOptimizer &optimizer, int max_num_trials, bool ignore_memory)
SMT Solver using DPLL as its backend.
Definition: smt.h:74
std::unordered_map< std::string, std::pair< bool, std::unordered_map< int, float > >> PathConstraintsTable
Alias for a table of path constraints.
Definition: type.h:1189
Core implementation of gradient-based smt solver.
Represents the base class for symbolic execution engine.
Definition: symbolic.h:397
int max_num_trials
Definition: symbolic.h:402
bool return_trace
Definition: symbolic.h:409
bool ignore_memory
Definition: symbolic.h:405
int maxUNSAT
The maximum number of UNSAT constraints to collect.
Definition: symbolic.h:401
GDOptimizer optimizer
Definition: symbolic.h:398
virtual Trace run(Prog &prog, std::unordered_set< int > &target_pcs, SymState &state, int maxDepth)=0
int maxSAT
The maximum number of SAT constraints to collect.
Definition: symbolic.h:400
int verbose_level
The level of verbosity.
Definition: symbolic.h:404
virtual bool solve(bool is_target, int pc, SymState &state)=0
BaseExecutor(GDOptimizer optimizer, int maxSAT=256, int maxUNSAT=256, int max_num_trials=10, bool ignore_memory=false, bool use_dpll=false, int verbose_level=0, bool return_trace=false)
Constructor for BaseExecutor.
Definition: symbolic.h:429
bool use_dpll
Definition: symbolic.h:407
Gradient Descent Optimizer for Symbolic Path Constraints.
Definition: gd.h:28
Represents a derived class for symbolic execution engine for deterministic programs.
Definition: symbolic.h:452
Trace run(Prog &prog, std::unordered_set< int > &target_pcs, SymState &state, int maxDepth=256)
Symbolically Execute a Program with Gradient Descent Optimization.
Definition: symbolic.h:519
bool solve(bool is_target, int pc, SymState &state)
Solves path constraints and updates the cache.
Definition: symbolic.h:469
PathConstraintsTable constraints_cache
Cache for storing and reusing path constraints.
Definition: symbolic.h:454
Struct representing the symbolic state of the symbolic execution.
Definition: type.h:1042
std::vector< Sym > path_constraints
Definition: type.h:1049
int var_cnt
Definition: type.h:1044
SymState * copy()
Create a copy object.
Definition: type.h:1098
void print() const
Prints a human-readable representation of the symbolic state.
Definition: type.h:1162
Linkedlist< Sym > symbolic_stack
Definition: type.h:1047
int pc
Definition: type.h:1043
SMem smem
Definition: type.h:1046
std::string toString(bool include_memory=true) const
Returns the human-redable string representation of concrete memory, symbolic memory and path constrai...
Definition: type.h:1119
Mem mem
Definition: type.h:1045
Struct representing a symbolic expression.
Definition: type.h:265
SymType symtype
Definition: type.h:266
int var_idx
Definition: type.h:270
Word32 word
Definition: type.h:269
Struct representing a trace in symbolic execution.
Definition: type.h:1210