Approximation of Bit-Vector Operations for BDD-based SMT Solvers
Experimental evaluation

Table of Contents

1 Tools

In the evaluation, we used the following tools

1.1 Boolector

  • in the version that entered SMT Competition 2017
  • using the following arguments from SMT Competition 2017

    -uc --fun:preprop --prop:nprops:10000
    

1.2 CVC4

  • in the version provided with the paper On Solving Quantified Bit-Vectors using Invertibility Conditions (CAV 2018)
  • using the following arguments

    --no-cond-rewrite-quant --cbqi-bv-ineq=eq-boundary --global-negate --no-cbqi-innermost --ext-rew-prep
    

1.3 Z3

  • in the stable version 4.6.0
  • using no additional arguments

1.4 Q3B (vanilla)

1.5 Q3B (with operation approximations)

2 CSV files

All statistics are computed from the BenchExec CSV file results.csv.

2.1 Convert to a correct CSV

First, we need to replace tabs by commas:

sed -i 's/\t/,/g' results.csv

And then we remove the standard BenchExec header rows and replace them by more readable ones.

cat results_header.csv > results_out.csv
tail -n +4 results.csv >> results_out.csv

2.2 Split into benchmark sets

We split results according to the benchmark set and remove the temp file.

cat results_header.csv > data/wintersteiger.csv
grep wintersteiger results_out.csv >> data/wintersteiger.csv
sed -i 's#wintersteiger/fmsd13/##g' data/wintersteiger.csv
sed -i 's#/#,##' data/wintersteiger.csv
#separator is #, so we do not have to escape all backslashes

cat results_header.csv > data/preiner.csv
grep Preiner results_out.csv >> data/preiner.csv
sed -i 's#2017-Preiner/##g' data/preiner.csv
sed -i 's#/#,##' data/preiner.csv

cat results_header.csv > data/heizmann.csv
grep Heizmann results_out.csv >> data/heizmann.csv
sed -i 's#20170501-Heizmann-UltimateAutomizer/#,#g' data/heizmann.csv

rm results_out.csv

3 R environment

3.1 Load necessary libraries

library(dplyr)
library(ggplot2)
library(scales)
library(colorspace)
library(RColorBrewer)
options("scipen"=100, "digits"=0)

3.2 Load all CSV files

res = list()
res$wintersteiger <- read.csv("data/wintersteiger.csv", header=TRUE, stringsAsFactors=FALSE)
res$preiner <- read.csv("data/preiner.csv", header=TRUE, stringsAsFactors=FALSE)
res$heizmann <- read.csv("data/heizmann.csv", header=TRUE, stringsAsFactors=FALSE)

res$wintersteiger[["source"]] <- "wintersteiger"
res$preiner[["source"]] <- "preiner"
res$heizmann[["source"]] <- "heizmann"

configurations = c('boolector', 'cvc4', 'z3', 'q3b', 'q3bOA', 'q3bOAnoOpt')
labels = c(boolector = 'Boolector', cvc4 = 'CVC4', z3 = 'Z3', q3b = 'Q3B', q3bOA = 'Q3B+OA', q3bOAnoOpt = 'Q3B+OA-opt')

We need to modify all times for results other than sat or unsat to have the maximal values of walltime and CPU-time.

cpuTimeout <- 5400

for (i in names(res))
{
  res[[i]][['trivial']] <- TRUE

  for (c in configurations)
  {
    res[[i]][[paste(c, 'solved', sep='.')]] <- res[[i]][[paste(c, 'result', sep='.')]] == "sat" | res[[i]][[paste(c, 'result', sep='.')]] == "unsat"
    res[[i]][[paste(c, 'cputime', sep='.')]][!res[[i]][[paste(c, 'solved', sep='.')]]] <- cpuTimeout
    res[[i]][[paste(c, 'walltime', sep='.')]][!res[[i]][[paste(c, 'solved', sep='.')]]] <- cpuTimeout/3
    res[[i]][['trivial']] <- res[[i]][['trivial']] & res[[i]][[paste(c, 'cputime', sep='.')]] < 0.1
  }
}

res$all <- rbind(res$wintersteiger, rbind(res$preiner, res$heizmann))

3.3 Check correctness of results

We need to check that no two solvers disagreed on the status of a benchmark (i.e. sat vs unsat). The result of the following command should be empty!

res$all %>%
    mutate(sat = ((boolector.result == "sat") | (cvc4.result == "sat") | (z3.result == "sat") |  (q3b.result == "sat") | (q3bOA.result == "sat")),
           unsat = ((boolector.result == "unsat") | (cvc4.result == "unsat") | (z3.result == "unsat") |  (q3b.result == "unsat") | (q3bOA.result == "unsat"))) %>%
    filter(sat & unsat) %>%
    select(benchmark, boolector.result, cvc4.result, z3.result, q3b.result, q3bOA.result)
benchmark boolector.result cvc4.result z3.result q3b.result q3bOA.result

4 Statistics

4.1 Numbers of solved formulas

4.1.1 Helper function to generate tables

getTable <- function(benchmarkSet)
{
  counts <- list()

  for (c in configurations)
  {
    counts[[c]] <- count_(res[[benchmarkSet]], paste(c, 'result', sep='.'))
    colnames(counts[[c]]) <- c('result', c)
  }

  return(Reduce(function(...) merge(..., all=TRUE), counts))
}

4.1.2 Wintersteiger

getTable('wintersteiger')
result boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
OUT OF MEMORY nil nil 7 nil 1 1
sat 66 83 71 94 94 93
timeout 31 19 20 3 2 3
unsat 94 89 93 94 94 94

4.1.3 Preiner

getTable('preiner')
result boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
OUT OF MEMORY nil nil 6 5 1 nil
sat 519 453 475 507 524 524
timeout 66 165 103 241 87 204
unsat 4244 4211 4245 4076 4217 4101

4.1.4 Heizmann

getTable('heizmann')
result boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
OUT OF MEMORY nil nil 3 nil nil nil
sat 17 18 13 19 20 20
timeout 100 6 94 19 17 17
unsat 14 107 21 93 94 94

4.1.5 Total

getTable('all')
result boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
OUT OF MEMORY nil nil 16 5 2 1
sat 602 554 559 620 638 637
timeout 197 190 217 263 106 224
unsat 4352 4407 4359 4263 4405 4289

4.2 Solved formulas in benchmark families

res$all %>%
  group_by(source, family) %>%
  summarise(boolector = sum(boolector.solved),
            cvc4 = sum(cvc4.solved),
            z3 = sum(z3.solved),
            q3b = sum(q3b.solved),
            q3bOA = sum(q3bOA.solved),
            q3bOAnoOpt = sum(q3bOAnoOpt.solved),)
source family boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
heizmann nil 31 125 34 112 114 114
preiner keymaera 4027 3997 4030 3890 4010 3894
preiner psyco 193 191 193 176 180 180
preiner scholl-smt08 319 252 271 291 325 325
preiner tptp 72 73 73 73 73 73
preiner UltimateAutomizer 152 151 153 153 153 153
wintersteiger fixpoint 119 126 110 129 129 128
wintersteiger ranking 41 46 54 59 59 59

4.2.1 Only sat

res$all %>%
  filter(z3.result == 'sat' | boolector.result == 'sat' | cvc4.result == 'sat'  | q3b.result == 'sat' | q3bOA.result == 'sat' ) %>%
  group_by(source, family) %>%
  summarise(boolector = sum(boolector.solved),
            cvc4 = sum(cvc4.solved),
            z3 = sum(z3.solved),
            q3b = sum(q3b.solved),
            q3bOA = sum(q3bOA.solved),
            q3bOAnoOpt = sum(q3bOAnoOpt.solved))
source family boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
heizmann nil 17 18 13 19 20 20
preiner keymaera 108 78 108 104 104 104
preiner psyco 131 129 131 131 131 131
preiner scholl-smt08 248 215 203 239 256 256
preiner tptp 17 17 17 17 17 17
preiner UltimateAutomizer 15 14 16 16 16 16
wintersteiger fixpoint 45 51 36 54 54 53
wintersteiger ranking 21 32 35 40 40 40

4.2.2 Only unsat

res$all %>%
  filter(z3.result == 'unsat' | boolector.result == 'unsat' | cvc4.result == 'unsat' | q3b.result == 'unsat' | q3bOA.result == 'unsat' ) %>%
  group_by(source, family) %>%
  summarise(boolector = sum(boolector.solved),
            cvc4 = sum(cvc4.solved),
            z3 = sum(z3.solved),
            q3b = sum(q3b.solved),
            q3bOA = sum(q3bOA.solved),
            q3bOAnoOpt = sum(q3bOAnoOpt.solved))
source family boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
heizmann nil 14 107 21 93 94 94
preiner keymaera 3919 3919 3922 3786 3906 3790
preiner psyco 62 62 62 45 49 49
preiner scholl-smt08 71 37 68 52 69 69
preiner tptp 55 56 56 56 56 56
preiner UltimateAutomizer 137 137 137 137 137 137
wintersteiger fixpoint 74 75 74 75 75 75
wintersteiger ranking 20 14 19 19 19 19

4.3 Uniquely solved benchmarks

4.3.1 Only by Z3

res$all %>%
  filter(z3.solved & !boolector.solved & !cvc4.solved & !q3b.solved & !q3bOA.solved) %>%
  select(source, family, benchmark) %>%
  group_by(source, family) %>%
  summarise(count = n())
source family count
preiner keymaera 1
preiner scholl-smt08 2

4.3.2 Only by Boolector

res$all %>%
  filter(!z3.solved & boolector.solved & !cvc4.solved & !q3b.solved & !q3bOA.solved) %>%
  select(source, family, benchmark) %>%
  group_by(source, family) %>%
  summarise(count = n())
source family count
preiner keymaera 1
preiner scholl-smt08 11

4.3.3 Only by CVC4

res$all %>%
  filter(!z3.solved & !boolector.solved & cvc4.solved & !q3b.solved & !q3bOA.solved) %>%
  select(source, family, benchmark) %>%
  group_by(source, family) %>%
  summarise(count = n())
source family count
heizmann nil 5
preiner scholl-smt08 1

4.3.4 Only by Q3B with OpApprox

res$all %>%
  filter(!z3.solved & !boolector.solved & !cvc4.solved & !q3b.solved & q3bOA.solved) %>%
  select(source, family, benchmark) %>%
  group_by(source, family) %>%
  summarise(count = n())
source family count
heizmann nil 1
preiner scholl-smt08 3

4.3.5 Only by Q3B with OpApprox (excluding the original Q3B)

res$all %>%
  filter(!z3.solved & !boolector.solved & !cvc4.solved & q3bOA.solved) %>%
  select(source, family, benchmark) %>%
  group_by(source, family) %>%
  summarise(count = n())
source family count
heizmann nil 1
preiner scholl-smt08 13
wintersteiger fixpoint 3
wintersteiger ranking 3

4.3.6 By none of the solvers

res$all %>%
  filter(!z3.solved & !boolector.solved & !cvc4.solved & !q3b.solved & !q3bOA.solved) %>%
  select(source, family, benchmark) %>%
  group_by(source, family) %>%
  summarise(count = n())
source family count
heizmann nil 2
preiner keymaera 4
preiner psyco 1
preiner scholl-smt08 29
wintersteiger fixpoint 2

4.4 Cross comparison

4.4.1 Helper function to generate tables

First we need a function which for given configurations computes a number of benchmarks that the first configuration has solved, but the second one has not.

firstIsBetter <- function(benchmarkSet, c1, c2)
{
  c1Solved <- res[[benchmarkSet]][[paste(c1, 'solved', sep='.')]]
  c2Solved <- res[[benchmarkSet]][[paste(c2, 'solved', sep='.')]]

  onlyC1Solved <- c1Solved & !(c2Solved)
  return(onlyC1Solved)
}

formulasFirstIsBetter <- function(benchmarkSet, c1, c2)
{
  return(res[[benchmarkSet]]$benchmark[firstIsBetter(benchmarkSet, c1, c2)])
}

compareConfigurations <- function(benchmarkSet, c1, c2)
{
  return(sum(firstIsBetter(benchmarkSet, c1, c2)))
}

We can use this function to generate the cross table.

crossTable <- function(benchmarkSet)
{
  results <- c()
  for (c1 in configurations)
  {
    for (c2 in configurations)
    {
      results <- c(results, compareConfigurations(benchmarkSet, c1, c2))
    }
  }

  results.table <- matrix(results, ncol=6,byrow=TRUE)
  colnames(results.table) <- configurations
  rownames(results.table) <- configurations
  out <- as.table(results.table)
  return(out)
}

4.4.2 Wintersteiger

crossTable('wintersteiger')
  boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
boolector 0 8 11 1 1 1
cvc4 20 0 19 0 0 0
z3 15 11 0 1 1 1
q3b 29 16 25 0 0 1
q3bOA 29 16 25 0 0 1
q3bOAnoOpt 28 15 24 0 0 0

4.4.3 Preiner

crossTable('preiner')
  boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
boolector 0 108 58 200 49 165
cvc4 9 0 31 172 36 151
z3 15 87 0 184 37 153
q3b 20 91 47 0 0 0
q3bOA 27 113 58 158 0 116
q3bOAnoOpt 27 112 58 42 0 0

4.4.4 Heizmann

crossTable('heizmann')
  boolector cvc4 z3 q3b q3bOA q3bOAnoOpt
boolector 0 3 7 2 1 1
cvc4 97 0 94 14 14 14
z3 10 3 0 11 10 10
q3b 83 1 89 0 1 1
q3bOA 84 3 90 3 0 0
q3bOAnoOpt 84 3 90 3 0 0

4.5 Formulas, where the result is different

4.5.1 Wintersteiger

formulasFirstIsBetter('wintersteiger', 'q3b', 'q3bOA')
  x
formulasFirstIsBetter('wintersteiger', 'q3bOA', 'q3b')
  x

4.5.2 Preiner

formulasFirstIsBetter('preiner', 'q3b', 'q3bOA')
  x
formulasFirstIsBetter('preiner', 'q3bOA', 'q3b')
  x
1 Ecoli-chemotaxis-node7007.smt2
2 Ecoli-chemotaxis-node7008.smt2
3 Ecoli-chemotaxis-node8284.smt2
4 Ecoli-chemotaxis-node8285.smt2
5 binary_driver-2007-10-09-node10280.smt2
6 binary_driver-2007-10-09-node11384.smt2
7 binary_driver-2007-10-09-node12553.smt2
8 binary_driver-2007-10-09-node13792.smt2
9 binary_driver-2007-10-09-node14896.smt2
10 binary_driver-2007-10-09-node8746.smt2
11 dccs-example-node3897.smt2
12 dccs-example-simple-node3459.smt2
13 dccs-example-simple-node6247.smt2
14 intersection-example-onelane.proof-node11126.smt2
15 intersection-example-onelane.proof-node11136.smt2
16 intersection-example-onelane.proof-node12481.smt2
17 intersection-example-onelane.proof-node12491.smt2
18 intersection-example-onelane.proof-node12962.smt2
19 intersection-example-onelane.proof-node12972.smt2
20 intersection-example-onelane.proof-node13822.smt2
21 intersection-example-onelane.proof-node13832.smt2
22 intersection-example-onelane.proof-node14158.smt2
23 intersection-example-onelane.proof-node14168.smt2
24 intersection-example-onelane.proof-node1469.smt2
25 intersection-example-onelane.proof-node1479.smt2
26 intersection-example-onelane.proof-node16095.smt2
27 intersection-example-onelane.proof-node16105.smt2
28 intersection-example-onelane.proof-node17872.smt2
29 intersection-example-onelane.proof-node17882.smt2
30 intersection-example-onelane.proof-node18278.smt2
31 intersection-example-onelane.proof-node18288.smt2
32 intersection-example-onelane.proof-node19598.smt2
33 intersection-example-onelane.proof-node20269.smt2
34 intersection-example-onelane.proof-node20279.smt2
35 intersection-example-onelane.proof-node21152.smt2
36 intersection-example-onelane.proof-node21162.smt2
37 intersection-example-onelane.proof-node21488.smt2
38 intersection-example-onelane.proof-node21498.smt2
39 intersection-example-onelane.proof-node21762.smt2
40 intersection-example-onelane.proof-node21772.smt2
41 intersection-example-onelane.proof-node23307.smt2
42 intersection-example-onelane.proof-node23317.smt2
43 intersection-example-onelane.proof-node23782.smt2
44 intersection-example-onelane.proof-node24127.smt2
45 intersection-example-onelane.proof-node24137.smt2
46 intersection-example-onelane.proof-node25171.smt2
47 intersection-example-onelane.proof-node25181.smt2
48 intersection-example-onelane.proof-node2524.smt2
49 intersection-example-onelane.proof-node2534.smt2
50 intersection-example-onelane.proof-node26897.smt2
51 intersection-example-onelane.proof-node29803.smt2
52 intersection-example-onelane.proof-node29813.smt2
53 intersection-example-onelane.proof-node30209.smt2
54 intersection-example-onelane.proof-node30219.smt2
55 intersection-example-onelane.proof-node31529.smt2
56 intersection-example-onelane.proof-node32239.smt2
57 intersection-example-onelane.proof-node32249.smt2
58 intersection-example-onelane.proof-node32497.smt2
59 intersection-example-onelane.proof-node32507.smt2
60 intersection-example-onelane.proof-node34026.smt2
61 intersection-example-onelane.proof-node34036.smt2
62 intersection-example-onelane.proof-node35453.smt2
63 intersection-example-onelane.proof-node37008.smt2
64 intersection-example-onelane.proof-node37018.smt2
65 intersection-example-onelane.proof-node38360.smt2
66 intersection-example-onelane.proof-node38370.smt2
67 intersection-example-onelane.proof-node3855.smt2
68 intersection-example-onelane.proof-node3865.smt2
69 intersection-example-onelane.proof-node39715.smt2
70 intersection-example-onelane.proof-node39725.smt2
71 intersection-example-onelane.proof-node40196.smt2
72 intersection-example-onelane.proof-node40206.smt2
73 intersection-example-onelane.proof-node41056.smt2
74 intersection-example-onelane.proof-node41066.smt2
75 intersection-example-onelane.proof-node41392.smt2
76 intersection-example-onelane.proof-node41402.smt2
77 intersection-example-onelane.proof-node4261.smt2
78 intersection-example-onelane.proof-node4271.smt2
79 intersection-example-onelane.proof-node43329.smt2
80 intersection-example-onelane.proof-node43339.smt2
81 intersection-example-onelane.proof-node45106.smt2
82 intersection-example-onelane.proof-node45116.smt2
83 intersection-example-onelane.proof-node45512.smt2
84 intersection-example-onelane.proof-node45522.smt2
85 intersection-example-onelane.proof-node46832.smt2
86 intersection-example-onelane.proof-node47503.smt2
87 intersection-example-onelane.proof-node47513.smt2
88 intersection-example-onelane.proof-node48386.smt2
89 intersection-example-onelane.proof-node48396.smt2
90 intersection-example-onelane.proof-node48722.smt2
91 intersection-example-onelane.proof-node48732.smt2
92 intersection-example-onelane.proof-node48996.smt2
93 intersection-example-onelane.proof-node49006.smt2
94 intersection-example-onelane.proof-node50541.smt2
95 intersection-example-onelane.proof-node50551.smt2
96 intersection-example-onelane.proof-node51016.smt2
97 intersection-example-onelane.proof-node51361.smt2
98 intersection-example-onelane.proof-node51371.smt2
99 intersection-example-onelane.proof-node52405.smt2
100 intersection-example-onelane.proof-node52415.smt2
101 intersection-example-onelane.proof-node53595.smt2
102 intersection-example-onelane.proof-node53901.smt2
103 intersection-example-onelane.proof-node53911.smt2
104 intersection-example-onelane.proof-node54078.smt2
105 intersection-example-onelane.proof-node5581.smt2
106 intersection-example-onelane.proof-node6556.smt2
107 intersection-example-onelane.proof-node6566.smt2
108 intersection-example-onelane.proof-node6814.smt2
109 intersection-example-onelane.proof-node6824.smt2
110 intersection-example-onelane.proof-node8099.smt2
111 intersection-example-onelane.proof-node8109.smt2
112 intersection-example-onelane.proof-node9774.smt2
113 intersection-example-onelane.proof-node9784.smt2
114 vsl.proof-node1351.smt2
115 vsl.proof-node1722.smt2
116 vsl.proof-node2202.smt2
117 vsl.proof-node2735.smt2
118 vsl.proof-node3106.smt2
119 vsli-alert.proof-node2416.smt2
120 vsli.proof-node2282.smt2
121 016.smt2
122 060.smt2
123 183.smt2
124 184.smt2
125 RND/RND_3_12.smt2
126 RND/RND_3_19.smt2
127 RND/RND_3_25.smt2
128 RND/RND_4_19.smt2
129 RND/RND_4_21.smt2
130 RND/RND_4_22.smt2
131 RND/RND_4_30.smt2
132 RND/RND_6_10.smt2
133 RND/RND_6_12.smt2
134 RND/RND_6_13.smt2
135 RND/RND_6_14.smt2
136 RND/RND_6_16.smt2
137 RND/RND_6_17.smt2
138 RND/RND_6_19.smt2
139 RND/RND_6_20.smt2
140 RND/RND_6_21.smt2
141 RND/RND_6_23.smt2
142 RND/RND_6_24.smt2
143 RND/RND_6_27.smt2
144 RND/RND_6_28.smt2
145 RND/RND_6_29.smt2
146 RND/RND_6_30.smt2
147 RND/RND_6_31.smt2
148 RND/RND_6_33.smt2
149 RND/RND_6_35.smt2
150 RND/RND_6_36.smt2
151 RND/RND_6_37.smt2
152 RND/RND_6_8.smt2
153 RNDPRE/RNDPRE_4_24.smt2
154 RNDPRE/RNDPRE_4_32.smt2
155 RNDPRE/RNDPRE_4_35.smt2
156 RNDPRE/RNDPRE_4_55.smt2
157 RNDPRE/RNDPRE_4_64.smt2
158 RNDPRE/RNDPRE_4_65.smt2

4.5.3 Heizmann

formulasFirstIsBetter('heizmann', 'q3b', 'q3bOA')
  x
1 sum02_false-unreach-call_true-no-overflow.i_336.smt2
formulasFirstIsBetter('heizmann', 'q3bOA', 'q3b')
  x
1 sum02_false-unreach-call_true-no-overflow.c_415.smt2
2 sum02_true-unreach-call_true-no-overflow.i_375.smt2
3 sum02_true-unreach-call_true-no-overflow.i_416.smt2

4.6 Differences in time

4.6.1 Helper functions

We define a function, which compares two configurations and returns benchmarks that were faster in the first configuration by at least 0.5 s.

getFaster <- function(benchmarkSet, c1, c2)
{
    firstIsBetter <- (res[[benchmarkSet]][[paste(c1, 'cputime', sep='.')]] + 0.5 < res[[benchmarkSet]][[paste(c2, 'cputime', sep='.')]]) &
        (res[[benchmarkSet]][[paste(c2, 'solved', sep='.')]])
    return(res[[benchmarkSet]][firstIsBetter, c('family', 'benchmark', paste(c1, 'cputime', sep='.'), paste(c2, 'cputime', sep='.'))])
}

getResourceConsumption <- function(benchmarkSet)
{
    d <- res[[benchmarkSet]]
    d[which((d$q3b.result == "sat" & d$q3bOA.result == "sat") | (d$q3b.result == "unsat" & d$q3bOA.result == "unsat")),] %>%
      summarise(q3bCPU = sum(q3b.cputime),
                q3bMEM = sum(q3b.memusage),
                q3bOACPU = sum(q3bOA.cputime),
                q3bOAMEM = sum(q3bOA.memusage))
}

4.6.2 Total time/memory taken

getResourceConsumption('wintersteiger')
q3bCPU q3bMEM q3bOACPU q3bOAMEM
10271.914595016 35347300352 14527.289808636 47733235712
getResourceConsumption('preiner')
q3bCPU q3bMEM q3bOACPU q3bOAMEM
56547.469152472 185058263040 36869.809526722 155845062656
getResourceConsumption('heizmann')
q3bCPU q3bMEM q3bOACPU q3bOAMEM
1829.125165153 17732628480 771.060594065 18478931968

4.6.3 Degraded performance

These are the results where the approximations increase the solving time by more than 0.5 s.

  1. Total count
    nrow(getFaster('all', 'q3b', 'q3bOA'))
    
    x
    115
  2. Wintersteiger
    getFaster('wintersteiger', 'q3b', 'q3bOA')
    
    family benchmark q3b.cputime q3bOA.cputime
    ranking 1394diag_ioctl.c.smt2 671.587307652 1417.905386247
    ranking 1394diag_isochapi.c.smt2 107.42415153 145.690326377
    ranking AVStream_hwsim.cpp.smt2 1.177106004 2.45390292
    ranking audio_ac97_common.cpp.smt2 107.182787838 146.083092278
    ranking audio_ac97_rtstream.cpp.smt2 107.514638853 146.758227597
    ranking audio_ac97_wavepcistream.cpp.smt2 1470.802759781 3322.724922215
    ranking audio_ddksynth_voice.cpp.smt2 0.467101721 5.540378279
    ranking audio_dmusuart_mpu.cpp.smt2 107.488648664 146.569389932
    ranking audio_gfxswap.xp_filter.cpp.smt2 0.851135804 1.558473792
    ranking filesys_cdfs_allocsup.c.smt2 132.626686447 161.650561263
    ranking filesys_fastfat_cachesup.c.smt2 107.181146596 145.880798036
    ranking filesys_fastfat_easup.c.smt2 109.305597157 145.953210713
    ranking general_pcidrv_sys_hw_eeprom.c.smt2 0.173784249 3.002112054
    ranking hid_firefly_app_firefly.cpp.smt2 48.802417474 134.542711733
    ranking hid_hclient_ecdisp.c.smt2 113.192036922 150.542453366
    ranking input_pnpi8042_moudep.c.smt2 108.212822482 156.439056916
    ranking kernel_uagp35_gart.c.smt2 107.280160455 155.555740697
    ranking kmdf_osrusbfx2_exe_dump.c.smt2 0.640004817 8.035077041
    ranking mmedia_gsm610_gsm610.c.smt2 113.079313483 157.687509459
    ranking mmedia_imaadpcm_imaadpcm.c.smt2 14.977591171 23.512965943
    ranking network_irda_miniport_nscirda_comm.c.smt2 0.63599908 6.175644587
    ranking network_ndis_coisdn_TpiParam.c.smt2 111.981431636 156.908094151
    ranking network_ndis_rtlnwifi_extsta_st_aplst.c.smt2 107.853437712 154.784069571
    ranking network_ndis_rtlnwifi_extsta_st_misc.c.smt2 523.67354059 1562.146911173
    ranking network_usbnwifi_mp_util.c.smt2 2.798840558 17.405900201
  3. Preiner
    getFaster('preiner', 'q3b', 'q3bOA')
    
    family benchmark q3b.cputime q3bOA.cputime
    keymaera ETCS-essentials-node3023.smt2 0.761381522 1.453021719
    keymaera binary_driver-2007-10-09-node12552.smt2 3.074012997 4.915266169
    keymaera binary_driver-2007-10-09-node8745.smt2 5.05642805 7.892995259
    keymaera bouncing-ball-simple-node5920.smt2 2.702275935 7.898430651
    keymaera breaking-node2128.smt2 0.797799935 1.503937369
    keymaera intersection-example-onelane.proof-node12517.smt2 1.055366416 1.884650801
    keymaera intersection-example-onelane.proof-node31286.smt2 0.852529758 1.500007313
    keymaera intersection-example-onelane.proof-node39751.smt2 1.056686747 1.895434925
    keymaera intersection-example-onelane.proof-node5338.smt2 0.843131359 1.492254846
    keymaera safety-lemma-node13046.smt2 1.664440236 3.297497288
    keymaera train_goal6-node8607.smt2 1.628834489 3.262541088
    keymaera vsli-alert.proof-node2442.smt2 43.786972111 87.403758318
    keymaera vsli.proof-node2308.smt2 49.174914674 106.276966507
    psyco 005.smt2 1.180574684 18.092380629
    psyco 006.smt2 1.109374699 17.884192677
    psyco 036.smt2 330.28922678 333.358735238
    psyco 037.smt2 324.149088401 329.421187357
    psyco 079.smt2 723.919693039 736.108938808
    psyco 080.smt2 685.342760807 707.018101909
    psyco 083.smt2 676.556121597 726.305845114
    psyco 084.smt2 713.627832156 720.153448149
    psyco 115.smt2 2.113850465 3.73514407
    psyco 116.smt2 2.12203046 32.312203297
    psyco 138.smt2 157.29214281 162.47832241
    psyco 143.smt2 0.339719648 0.903540082
    psyco 153.smt2 0.710333882 1.777799322
    psyco 154.smt2 0.686893367 2.016368237
    psyco 159.smt2 1.373608934 8.626490044
    psyco 170.smt2 2898.514730796 2927.011825948
    psyco 172.smt2 3.407084539 6.737689669
    psyco 173.smt2 3.395646074 40.100174375
    scholl-smt08 RND/RND_3_13.smt2 160.775761375 164.006917485
    scholl-smt08 RND/RND_3_2.smt2 211.921715074 370.427616215
    scholl-smt08 RND/RND_3_23.smt2 11.440463161 17.779460724
    scholl-smt08 RND/RND_3_26.smt2 38.51347549 64.442988046
    scholl-smt08 RND/RND_3_3.smt2 81.823767675 121.484194662
    scholl-smt08 RND/RND_3_4.smt2 40.988748903 57.68242831
    scholl-smt08 RND/RND_3_6.smt2 4.447337029 5.056113461
    scholl-smt08 RND/RND_4_1.smt2 1358.110785151 4596.355982227
    scholl-smt08 RND/RND_4_10.smt2 23.57007602 31.78109152
    scholl-smt08 RND/RND_4_17.smt2 16.781952943 20.902500698
    scholl-smt08 RND/RND_4_5.smt2 455.790696526 609.616239804
    scholl-smt08 RND/RND_4_7.smt2 87.283331007 89.21377268
    scholl-smt08 RND/RND_6_1.smt2 260.370919081 344.469927938
    scholl-smt08 RNDPRE/RNDPRE_3_1.smt2 2.8637689 3.506070649
    scholl-smt08 RNDPRE/RNDPRE_3_11.smt2 22.353424387 33.376241487
    scholl-smt08 RNDPRE/RNDPRE_3_12.smt2 80.83653691 94.851198525
    scholl-smt08 RNDPRE/RNDPRE_3_14.smt2 2.048917488 2.716594188
    scholl-smt08 RNDPRE/RNDPRE_3_18.smt2 23.409304934 35.633548173
    scholl-smt08 RNDPRE/RNDPRE_3_20.smt2 4.077024344 4.797727861
    scholl-smt08 RNDPRE/RNDPRE_3_21.smt2 80.763015609 93.066239118
    scholl-smt08 RNDPRE/RNDPRE_3_22.smt2 1165.82234427 1338.829963454
    scholl-smt08 RNDPRE/RNDPRE_3_24.smt2 10.191486414 11.635671919
    scholl-smt08 RNDPRE/RNDPRE_3_31.smt2 63.863727399 68.420517213
    scholl-smt08 RNDPRE/RNDPRE_3_39.smt2 57.550011084 60.737275742
    scholl-smt08 RNDPRE/RNDPRE_3_41.smt2 56.980588568 62.139511867
    scholl-smt08 RNDPRE/RNDPRE_3_44.smt2 48.294877044 67.466062472
    scholl-smt08 RNDPRE/RNDPRE_3_45.smt2 2416.317217598 3767.803495442
    scholl-smt08 RNDPRE/RNDPRE_3_49.smt2 36.455572791 43.328566235
    scholl-smt08 RNDPRE/RNDPRE_3_51.smt2 185.475250077 215.754833315
    scholl-smt08 RNDPRE/RNDPRE_3_53.smt2 93.477684338 116.191977579
    scholl-smt08 RNDPRE/RNDPRE_3_59.smt2 42.526751455 49.431820069
    scholl-smt08 RNDPRE/RNDPRE_3_6.smt2 2.511326697 3.655771737
    scholl-smt08 RNDPRE/RNDPRE_4_33.smt2 263.254385614 281.938122092
    scholl-smt08 RNDPRE/RNDPRE_4_38.smt2 340.824356162 743.455711418
    scholl-smt08 RNDPRE/RNDPRE_4_4.smt2 795.058177052 1080.018525106
    scholl-smt08 RNDPRE/RNDPRE_4_40.smt2 3053.529413943 3612.426750411
    scholl-smt08 RNDPRE/RNDPRE_4_57.smt2 44.734779512 45.340850489
    scholl-smt08 RNDPRE/RNDPRE_4_8.smt2 123.110106248 169.895047003
    scholl-smt08 RNDPRE/RNDPRE_4_9.smt2 141.150958497 173.825892858
  4. Heizmann
    getFaster('heizmann', 'q3b', 'q3bOA')
    
    family benchmark q3b.cputime q3bOA.cputime
    nil gcd_2_true-unreach-call_true-no-overflow.i_914.smt2 3.114726554 4.101953446
    nil gcd_2_true-unreach-call_true-no-overflow.i_921.smt2 3.116258075 4.055646007
    nil jain_4_true-unreach-call_true-no-overflow.i_198.smt2 0.225999084 1.21812294
    nil jain_4_true-unreach-call_true-no-overflow.i_200.smt2 0.25686148 1.253770278
    nil jain_4_true-unreach-call_true-no-overflow.i_225.smt2 2.846460092 4.131623695
    nil jain_4_true-unreach-call_true-no-overflow.i_228.smt2 0.237294655 1.35334369
    nil jain_4_true-unreach-call_true-no-overflow.i_341.smt2 0.200606218 0.862380401
    nil jain_6_true-unreach-call_true-no-overflow.i_198.smt2 0.254291489 1.508441629
    nil jain_6_true-unreach-call_true-no-overflow.i_200.smt2 0.259098131 1.556182019
    nil jain_6_true-unreach-call_true-no-overflow.i_225.smt2 2.931025827 4.986284754
    nil jain_6_true-unreach-call_true-no-overflow.i_230.smt2 0.262546205 1.640234998
    nil jain_6_true-unreach-call_true-no-overflow.i_339.smt2 0.225196145 1.23780286
    nil jain_7_true-unreach-call_true-no-overflow.i_215.smt2 0.293578357 20.900286224
    nil jain_7_true-unreach-call_true-no-overflow.i_217.smt2 0.301615327 20.983302616
    nil jain_7_true-unreach-call_true-no-overflow.i_242.smt2 0.308568166 31.420537673
    nil jain_7_true-unreach-call_true-no-overflow.i_245.smt2 0.283402205 20.645164208
    nil jain_7_true-unreach-call_true-no-overflow.i_259.smt2 0.157248987 6.999356617
    nil jain_7_true-unreach-call_true-no-overflow.i_262.smt2 0.302941701 20.872302902
    nil jain_7_true-unreach-call_true-no-overflow.i_382.smt2 0.247529518 10.186497986
    nil jain_7_true-unreach-call_true-no-overflow.i_61.smt2 0.16028079 6.93921993

4.6.4 Improved performance

These are the results where the approximations decrease the solving time by more than 0.5 s.

  1. Total count
    nrow(getFaster('all', 'q3bOA', 'q3b'))
    
    x
    96
  2. Wintersteiger
    getFaster('wintersteiger', 'q3bOA', 'q3b')
    
    family benchmark q3bOA.cputime q3b.cputime
    fixpoint sdlx-fixpoint-7.smt2 636.680779933 645.128950166
    fixpoint sdlx-fixpoint-8.smt2 5346.053308338 5381.851551904
  3. Preiner
    getFaster('preiner', 'q3bOA', 'q3b')
    
    family benchmark q3bOA.cputime q3b.cputime
    keymaera binary_driver-2007-10-09-node10279.smt2 2.487087945 3.151799603
    keymaera binary_driver-2007-10-09-node13791.smt2 2.473819409 3.104345336
    keymaera safety-lemma-node14050.smt2 1.607410994 5.306416397
    keymaera safety-lemma-node9082.smt2 17.351520903 20.241532654
    keymaera train_goal1-node5682.smt2 1.600008342 5.295491251
    keymaera train_goal3-node5717.smt2 17.877822059 21.210864749
    psyco 035.smt2 324.106964028 377.346064139
    psyco 059.smt2 1.156345407 2436.124622957
    psyco 078.smt2 697.445739388 700.323267796
    psyco 081.smt2 729.281808289 730.011129883
    psyco 082.smt2 695.503608239 731.548550458
    psyco 085.smt2 714.349296492 726.842541605
    psyco 086.smt2 716.430426521 727.375572516
    psyco 087.smt2 714.251385993 726.47276434
    psyco 088.smt2 701.917221941 731.765957719
    psyco 124.smt2 1.395648821 2.377621553
    psyco 155.smt2 0.45303503 2.529845927
    psyco 168.smt2 1171.482072847 1173.113759855
    psyco 195.smt2 100.771228139 101.59982281
    scholl-smt08 RND/RND_3_1.smt2 1.889658294 5.811979496
    scholl-smt08 RND/RND_3_11.smt2 5.429106369 349.341929162
    scholl-smt08 RND/RND_3_14.smt2 17.897407595 623.609974835
    scholl-smt08 RND/RND_3_15.smt2 0.770786603 6.461161507
    scholl-smt08 RND/RND_3_16.smt2 4.545652793 37.449699345
    scholl-smt08 RND/RND_3_17.smt2 11.241660131 14.752083302
    scholl-smt08 RND/RND_3_21.smt2 10.772769046 16.427674641
    scholl-smt08 RND/RND_3_22.smt2 85.696575773 3215.819319923
    scholl-smt08 RND/RND_3_27.smt2 5.506705 7.966480083
    scholl-smt08 RND/RND_3_29.smt2 13.103730901 138.46411448
    scholl-smt08 RND/RND_3_9.smt2 0.94652083 8.436300399
    scholl-smt08 RND/RND_4_11.smt2 8.563307467 84.37104354
    scholl-smt08 RND/RND_4_12.smt2 9.53956596 4235.859544771
    scholl-smt08 RND/RND_4_15.smt2 30.134178802 32.983549076
    scholl-smt08 RND/RND_4_23.smt2 31.566834216 287.756033364
    scholl-smt08 RND/RND_4_24.smt2 190.757992528 1322.358761372
    scholl-smt08 RND/RND_4_25.smt2 167.120237002 333.547240883
    scholl-smt08 RND/RND_4_28.smt2 192.878609488 216.088515036
    scholl-smt08 RND/RND_4_29.smt2 35.178766725 1790.508208779
    scholl-smt08 RND/RND_4_4.smt2 1045.328163469 1057.006101041
    scholl-smt08 RND/RND_4_6.smt2 2.810378875 18.603396187
    scholl-smt08 RND/RND_6_18.smt2 342.399324654 1782.288360113
    scholl-smt08 RND/RND_6_3.smt2 0.649935062 1.675127776
    scholl-smt08 RND/RND_6_4.smt2 1.133371754 62.324435998
    scholl-smt08 RND/RND_6_5.smt2 5.200997536 22.330153093
    scholl-smt08 RND/RND_6_6.smt2 5.270796785 1121.119919961
    scholl-smt08 RND/RND_6_7.smt2 4.998338076 176.530199489
    scholl-smt08 RND/RND_6_9.smt2 1.769435082 17.49054758
    scholl-smt08 RNDPRE/RNDPRE_3_15.smt2 14.666402256 15.391217505
    scholl-smt08 RNDPRE/RNDPRE_3_16.smt2 14.206145704 15.756987725
    scholl-smt08 RNDPRE/RNDPRE_3_17.smt2 5.23517539 33.649540902
    scholl-smt08 RNDPRE/RNDPRE_3_26.smt2 2.588325318 35.636075564
    scholl-smt08 RNDPRE/RNDPRE_3_28.smt2 4.072746926 5.417138591
    scholl-smt08 RNDPRE/RNDPRE_3_29.smt2 3.874189601 4.671661551
    scholl-smt08 RNDPRE/RNDPRE_3_30.smt2 2.734287667 3.432511614
    scholl-smt08 RNDPRE/RNDPRE_3_34.smt2 17.868906689 22.746956995
    scholl-smt08 RNDPRE/RNDPRE_3_35.smt2 19.881819194 938.411815458
    scholl-smt08 RNDPRE/RNDPRE_3_37.smt2 7.325950314 325.586238466
    scholl-smt08 RNDPRE/RNDPRE_3_38.smt2 16.034573308 17.046350609
    scholl-smt08 RNDPRE/RNDPRE_3_40.smt2 6.129053044 7.81847456
    scholl-smt08 RNDPRE/RNDPRE_3_43.smt2 10.542567208 13.37075801
    scholl-smt08 RNDPRE/RNDPRE_3_48.smt2 4.524957574 5.421733311
    scholl-smt08 RNDPRE/RNDPRE_3_5.smt2 7.071562511 41.440126171
    scholl-smt08 RNDPRE/RNDPRE_3_50.smt2 12.021425061 14.128029887
    scholl-smt08 RNDPRE/RNDPRE_3_54.smt2 16.11967485 20.387409729
    scholl-smt08 RNDPRE/RNDPRE_3_60.smt2 36.716722002 47.183605343
    scholl-smt08 RNDPRE/RNDPRE_4_1.smt2 211.368804631 255.553955107
    scholl-smt08 RNDPRE/RNDPRE_4_10.smt2 7.753270781 10.70993155
    scholl-smt08 RNDPRE/RNDPRE_4_13.smt2 138.382260703 169.350536309
    scholl-smt08 RNDPRE/RNDPRE_4_20.smt2 5.817725783 4756.116208186
    scholl-smt08 RNDPRE/RNDPRE_4_22.smt2 79.681774593 818.135048049
    scholl-smt08 RNDPRE/RNDPRE_4_23.smt2 61.594335349 63.358403247
    scholl-smt08 RNDPRE/RNDPRE_4_25.smt2 6.842101137 8.586819705
    scholl-smt08 RNDPRE/RNDPRE_4_27.smt2 7.144846459 319.995529854
    scholl-smt08 RNDPRE/RNDPRE_4_29.smt2 11.442101052 486.974968373
    scholl-smt08 RNDPRE/RNDPRE_4_3.smt2 30.108419838 42.660076666
    scholl-smt08 RNDPRE/RNDPRE_4_30.smt2 733.131736181 1761.760700473
    scholl-smt08 RNDPRE/RNDPRE_4_37.smt2 8.635494177 190.045851998
    scholl-smt08 RNDPRE/RNDPRE_4_43.smt2 19.220412922 224.856578689
    scholl-smt08 RNDPRE/RNDPRE_4_50.smt2 45.514374379 263.633406911
    scholl-smt08 RNDPRE/RNDPRE_4_6.smt2 287.89092773 348.77595503
    scholl-smt08 model/model_5_33.smt2 0.707994297 1.356916268
    scholl-smt08 model/model_5_34.smt2 1.23627363 1.810174616
    scholl-smt08 model/model_5_35.smt2 1.531462032 2.168357717
    scholl-smt08 model/model_5_36.smt2 1.869723165 3.58982786
    scholl-smt08 model/model_5_37.smt2 1.132253005 1.844461486
    scholl-smt08 model/model_5_38.smt2 1.404746752 2.126453859
    scholl-smt08 model/model_5_39.smt2 1.474160876 3.552230534
    scholl-smt08 model/model_5_45.smt2 0.837594013 1.643754266
  4. Heizmann
    getFaster('heizmann', 'q3bOA', 'q3b')
    
    family benchmark q3bOA.cputime q3b.cputime
    nil gcd_2_true-unreach-call_true-no-overflow.i_925.smt2 12.024143788 1180.25749113
    nil gcd_3_true-unreach-call_true-no-overflow.i_1105.smt2 171.067154581 172.472060124
    nil jain_6_true-unreach-call_true-no-overflow.i_236.smt2 21.501500651 26.055018963
    nil verisec_sendmail__tTflag_arr_one_loop_false-unreach-call.i_1353.smt2 1.274247397 2.161401277
    nil verisec_sendmail__tTflag_arr_one_loop_false-unreach-call.i_2400.smt2 6.65083382 10.465717258
    nil verisec_sendmail__tTflag_arr_one_loop_false-unreach-call.i_2406.smt2 6.682220397 37.14365104

5 Plots

5.1 Scatter plots

5.1.1 Helper functions

We again define a function, which can generate a scatter plot for a given benchmark set and two configurations.

scatterPlot <- function(benchmarkSet, c1, c2)
{
return (ggplot(res[[benchmarkSet]],
      aes(res[[benchmarkSet]][[paste(c1, 'cputime', sep='.')]], res[[benchmarkSet]][[paste(c2, 'cputime', sep='.')]], colour=factor(q3b.result, c("timeout", "sat", "unsat", "out of memory"))), xlim=c(0.001,cpuTimeout), ylim=c(0.01,cpuTimeout)) +
      geom_point() +
      geom_abline(intercept=0) +
      geom_abline(intercept=1, color='gray') +
      geom_abline(intercept=-1, color='gray') +
      geom_abline(intercept=2, color='gray') +
      geom_abline(intercept=-2, color='gray') +
      xlab(c1) +
      ylab(c2) +
      scale_x_log10(limits = c(0.01, cpuTimeout), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
      scale_y_log10(limits = c(0.01, cpuTimeout), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
      guides(colour=FALSE) +
      theme(legend.title=element_blank()))

}

scatterPlotMem <- function(benchmarkSet, c1, c2)
{
return (ggplot(res[[benchmarkSet]],
      aes(res[[benchmarkSet]][[paste(c1, 'memusage', sep='.')]], res[[benchmarkSet]][[paste(c2, 'memusage', sep='.')]], colour=factor(q3b.result, c("timeout", "sat", "unsat", "out of memory"))), xlim=c(0.001,8000000000), ylim=c(0.01,8000000000)) +
      geom_point() +
      geom_abline(intercept=0) +
      xlab(c1) +
      ylab(c2) +
      scale_x_log10(limits = c(1000000, 16000000000), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
      scale_y_log10(limits = c(1000000, 16000000000), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
      guides(colour=FALSE) +
      theme(legend.title=element_blank()))

}

5.1.2 Wintersteiger

scatterPlot('wintersteiger', 'q3b', 'q3bOA')

wintersteiger.png

scatterPlotMem('wintersteiger', 'q3b', 'q3bOA')

wintersteigerMem.png

5.1.3 Preiner

scatterPlot('preiner', 'q3b', 'q3bOA')

preiner.png

scatterPlotMem('preiner', 'q3b', 'q3bOA')

preinerMem.png

5.1.4 Heizmann

scatterPlot('heizmann', 'q3b', 'q3bOA')

heizmann.png

scatterPlotMem('heizmann', 'q3b', 'q3bOA')

heizmannMem.png

5.2 Quantile plots

5.2.1 Helper functions

quantilePlot <- function(benchmarkSet, onlyTrivial = FALSE)
{
    num <- length(configurations)

    data <- res[[benchmarkSet]]

    if (onlyTrivial)
    {
        data <- filter(data, trivial == FALSE)
    }

    ordered = list()
    for (c in configurations)
    {
        ordered[[c]] = sort(data[[paste(c, 'cputime', sep='.')]][data[[paste(c, 'solved', sep='.')]]])
    }

    plot(c(0, nrow(data)), c(0.001, cpuTimeout), log='y', xlab='Solved formulas', ylab='CPU time (s)', frame.plot=TRUE, type='n', yaxt="n")
    axis(2, at = c(0.001, 0.01, 0.1, 1, 10, 100, 1000),
         labels = c(expression(paste("10"^"-3")),
                    expression(paste("10"^"-2")),
                    expression(paste("10"^"-1")),
                    "1",
                    "10",
                    expression(paste("10"^"2")),
                    expression(paste("10"^"3"))))

    colors <- c("blue", "darkgreen", "red", "black", "purple")
    ltys <- c(5,6,4,1,2)
    for (i in seq_along(configurations))
    {
        c <- configurations[i]
        lines(1:length(ordered[[c]]), ordered[[c]], type='s', col=colors[i], lty=ltys[i])
    }

    legend("topleft",
           lty=ltys,
           lwd=rep(2, each=num),
           col=colors,
           legend=labels)
}
quantilePlot('wintersteiger')

Sorry, your browser does not support SVG.

quantilePlot('preiner')

Sorry, your browser does not support SVG.

quantilePlot('heizmann')

Sorry, your browser does not support SVG.

quantilePlot('all')

Sorry, your browser does not support SVG.

5.3 Quantile plots of non-trivial benchmarks

This plot shows only result that are not trivial (i.e. some solver took more than 0.1 second to solve it)

This is the number of trivial benchmarks

nrow(filter(res$all, trivial))
3168

quantilePlot('all', TRUE)

Sorry, your browser does not support SVG.

quantilePlot('all', TRUE)

5.4 Unsolved benchmarks

plotUnsolved <- function(cs)
{
    cs <- rev(cs)
    unsolved = data.frame(source=character(),
                          configuration=character(),
                          stringsAsFactors=TRUE)
    for (c in cs)
    {
        cUnsolved <- res$all[["source"]][!res$all[[paste(c, 'solved', sep='.')]]]
        cUnsolved <- data.frame(
            source = cUnsolved,
            configuration = labels[c])
        unsolved <- rbind(unsolved, cUnsolved)
    }

    chart.data <- unsolved %>%
                    group_by(source, configuration) %>%
                    summarize(freq = n()) %>%
                    arrange(desc(source)) %>%
                    group_by(configuration) %>%
                    mutate(pos = cumsum(freq) - (0.5 * freq))

    ggplot(data = chart.data, aes(x = configuration, y = freq, fill = source)) +
        geom_bar(stat="identity") +
        coord_flip() +
        geom_text(data=chart.data, aes(x = configuration, y = pos, label = freq), size=3) +
        labs(y = "Number of unsolved benchmarks (less is better)", x = NULL, fill = "Benchmark set") +
        scale_fill_brewer(palette = "Set2")
}
plotUnsolved(c('boolector', 'cvc4', 'z3', 'q3b', 'q3bOA'))

Sorry, your browser does not support SVG.

Author: Martin Jonáš

Created: 2018-08-01 Wed 19:26