Approximation of Bit-Vector Operations for BDD-based SMT Solvers
Experimental evaluation
Table of Contents
1 Tools
In the evaluation, we used the following tools
1.1 Boolector
- in the version that entered SMT Competition 2017
using the following arguments from SMT Competition 2017
-uc --fun:preprop --prop:nprops:10000
1.2 CVC4
- in the version provided with the paper On Solving Quantified Bit-Vectors using Invertibility Conditions (CAV 2018)
using the following arguments
--no-cond-rewrite-quant --cbqi-bv-ineq=eq-boundary --global-negate --no-cbqi-innermost --ext-rew-prep
1.3 Z3
- in the stable version 4.6.0
- using no additional arguments
1.4 Q3B (vanilla)
- in the development version, GitHub release https://github.com/martinjonas/Q3B/releases/tag/ictac2018
using the following arguments
-A --approximation-method variables --propagate-unconstrained --check-models -r sift
1.5 Q3B (with operation approximations)
- in the same development version, GitHub release https://github.com/martinjonas/Q3B/releases/tag/ictac2018
using the following arguments
-A --approximation-method both --limit-bddsizes --propagate-unconstrained --check-models -r sift
2 CSV files
All statistics are computed from the BenchExec CSV file results.csv.
2.1 Convert to a correct CSV
First, we need to replace tabs by commas:
sed -i 's/\t/,/g' results.csv
And then we remove the standard BenchExec header rows and replace them by more readable ones.
cat results_header.csv > results_out.csv tail -n +4 results.csv >> results_out.csv
2.2 Split into benchmark sets
We split results according to the benchmark set and remove the temp file.
cat results_header.csv > data/wintersteiger.csv grep wintersteiger results_out.csv >> data/wintersteiger.csv sed -i 's#wintersteiger/fmsd13/##g' data/wintersteiger.csv sed -i 's#/#,##' data/wintersteiger.csv #separator is #, so we do not have to escape all backslashes cat results_header.csv > data/preiner.csv grep Preiner results_out.csv >> data/preiner.csv sed -i 's#2017-Preiner/##g' data/preiner.csv sed -i 's#/#,##' data/preiner.csv cat results_header.csv > data/heizmann.csv grep Heizmann results_out.csv >> data/heizmann.csv sed -i 's#20170501-Heizmann-UltimateAutomizer/#,#g' data/heizmann.csv rm results_out.csv
3 R environment
3.1 Load necessary libraries
library(dplyr) library(ggplot2) library(scales) library(colorspace) library(RColorBrewer) options("scipen"=100, "digits"=0)
3.2 Load all CSV files
res = list() res$wintersteiger <- read.csv("data/wintersteiger.csv", header=TRUE, stringsAsFactors=FALSE) res$preiner <- read.csv("data/preiner.csv", header=TRUE, stringsAsFactors=FALSE) res$heizmann <- read.csv("data/heizmann.csv", header=TRUE, stringsAsFactors=FALSE) res$wintersteiger[["source"]] <- "wintersteiger" res$preiner[["source"]] <- "preiner" res$heizmann[["source"]] <- "heizmann" configurations = c('boolector', 'cvc4', 'z3', 'q3b', 'q3bOA', 'q3bOAnoOpt') labels = c(boolector = 'Boolector', cvc4 = 'CVC4', z3 = 'Z3', q3b = 'Q3B', q3bOA = 'Q3B+OA', q3bOAnoOpt = 'Q3B+OA-opt')
We need to modify all times for results other than sat or unsat to have the maximal values of walltime and CPU-time.
cpuTimeout <- 5400 for (i in names(res)) { res[[i]][['trivial']] <- TRUE for (c in configurations) { res[[i]][[paste(c, 'solved', sep='.')]] <- res[[i]][[paste(c, 'result', sep='.')]] == "sat" | res[[i]][[paste(c, 'result', sep='.')]] == "unsat" res[[i]][[paste(c, 'cputime', sep='.')]][!res[[i]][[paste(c, 'solved', sep='.')]]] <- cpuTimeout res[[i]][[paste(c, 'walltime', sep='.')]][!res[[i]][[paste(c, 'solved', sep='.')]]] <- cpuTimeout/3 res[[i]][['trivial']] <- res[[i]][['trivial']] & res[[i]][[paste(c, 'cputime', sep='.')]] < 0.1 } } res$all <- rbind(res$wintersteiger, rbind(res$preiner, res$heizmann))
3.3 Check correctness of results
We need to check that no two solvers disagreed on the status of a benchmark (i.e. sat vs unsat). The result of the following command should be empty!
res$all %>% mutate(sat = ((boolector.result == "sat") | (cvc4.result == "sat") | (z3.result == "sat") | (q3b.result == "sat") | (q3bOA.result == "sat")), unsat = ((boolector.result == "unsat") | (cvc4.result == "unsat") | (z3.result == "unsat") | (q3b.result == "unsat") | (q3bOA.result == "unsat"))) %>% filter(sat & unsat) %>% select(benchmark, boolector.result, cvc4.result, z3.result, q3b.result, q3bOA.result)
benchmark | boolector.result | cvc4.result | z3.result | q3b.result | q3bOA.result |
4 Statistics
4.1 Numbers of solved formulas
4.1.1 Helper function to generate tables
getTable <- function(benchmarkSet) { counts <- list() for (c in configurations) { counts[[c]] <- count_(res[[benchmarkSet]], paste(c, 'result', sep='.')) colnames(counts[[c]]) <- c('result', c) } return(Reduce(function(...) merge(..., all=TRUE), counts)) }
4.1.2 Wintersteiger
getTable('wintersteiger')
result | boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt |
---|---|---|---|---|---|---|
OUT OF MEMORY | nil | nil | 7 | nil | 1 | 1 |
sat | 66 | 83 | 71 | 94 | 94 | 93 |
timeout | 31 | 19 | 20 | 3 | 2 | 3 |
unsat | 94 | 89 | 93 | 94 | 94 | 94 |
4.1.3 Preiner
getTable('preiner')
result | boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt |
---|---|---|---|---|---|---|
OUT OF MEMORY | nil | nil | 6 | 5 | 1 | nil |
sat | 519 | 453 | 475 | 507 | 524 | 524 |
timeout | 66 | 165 | 103 | 241 | 87 | 204 |
unsat | 4244 | 4211 | 4245 | 4076 | 4217 | 4101 |
4.1.4 Heizmann
getTable('heizmann')
result | boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt |
---|---|---|---|---|---|---|
OUT OF MEMORY | nil | nil | 3 | nil | nil | nil |
sat | 17 | 18 | 13 | 19 | 20 | 20 |
timeout | 100 | 6 | 94 | 19 | 17 | 17 |
unsat | 14 | 107 | 21 | 93 | 94 | 94 |
4.1.5 Total
getTable('all')
result | boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt |
---|---|---|---|---|---|---|
OUT OF MEMORY | nil | nil | 16 | 5 | 2 | 1 |
sat | 602 | 554 | 559 | 620 | 638 | 637 |
timeout | 197 | 190 | 217 | 263 | 106 | 224 |
unsat | 4352 | 4407 | 4359 | 4263 | 4405 | 4289 |
4.2 Solved formulas in benchmark families
res$all %>% group_by(source, family) %>% summarise(boolector = sum(boolector.solved), cvc4 = sum(cvc4.solved), z3 = sum(z3.solved), q3b = sum(q3b.solved), q3bOA = sum(q3bOA.solved), q3bOAnoOpt = sum(q3bOAnoOpt.solved),)
source | family | boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt |
---|---|---|---|---|---|---|---|
heizmann | nil | 31 | 125 | 34 | 112 | 114 | 114 |
preiner | keymaera | 4027 | 3997 | 4030 | 3890 | 4010 | 3894 |
preiner | psyco | 193 | 191 | 193 | 176 | 180 | 180 |
preiner | scholl-smt08 | 319 | 252 | 271 | 291 | 325 | 325 |
preiner | tptp | 72 | 73 | 73 | 73 | 73 | 73 |
preiner | UltimateAutomizer | 152 | 151 | 153 | 153 | 153 | 153 |
wintersteiger | fixpoint | 119 | 126 | 110 | 129 | 129 | 128 |
wintersteiger | ranking | 41 | 46 | 54 | 59 | 59 | 59 |
4.2.1 Only sat
res$all %>% filter(z3.result == 'sat' | boolector.result == 'sat' | cvc4.result == 'sat' | q3b.result == 'sat' | q3bOA.result == 'sat' ) %>% group_by(source, family) %>% summarise(boolector = sum(boolector.solved), cvc4 = sum(cvc4.solved), z3 = sum(z3.solved), q3b = sum(q3b.solved), q3bOA = sum(q3bOA.solved), q3bOAnoOpt = sum(q3bOAnoOpt.solved))
source | family | boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt |
---|---|---|---|---|---|---|---|
heizmann | nil | 17 | 18 | 13 | 19 | 20 | 20 |
preiner | keymaera | 108 | 78 | 108 | 104 | 104 | 104 |
preiner | psyco | 131 | 129 | 131 | 131 | 131 | 131 |
preiner | scholl-smt08 | 248 | 215 | 203 | 239 | 256 | 256 |
preiner | tptp | 17 | 17 | 17 | 17 | 17 | 17 |
preiner | UltimateAutomizer | 15 | 14 | 16 | 16 | 16 | 16 |
wintersteiger | fixpoint | 45 | 51 | 36 | 54 | 54 | 53 |
wintersteiger | ranking | 21 | 32 | 35 | 40 | 40 | 40 |
4.2.2 Only unsat
res$all %>% filter(z3.result == 'unsat' | boolector.result == 'unsat' | cvc4.result == 'unsat' | q3b.result == 'unsat' | q3bOA.result == 'unsat' ) %>% group_by(source, family) %>% summarise(boolector = sum(boolector.solved), cvc4 = sum(cvc4.solved), z3 = sum(z3.solved), q3b = sum(q3b.solved), q3bOA = sum(q3bOA.solved), q3bOAnoOpt = sum(q3bOAnoOpt.solved))
source | family | boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt |
---|---|---|---|---|---|---|---|
heizmann | nil | 14 | 107 | 21 | 93 | 94 | 94 |
preiner | keymaera | 3919 | 3919 | 3922 | 3786 | 3906 | 3790 |
preiner | psyco | 62 | 62 | 62 | 45 | 49 | 49 |
preiner | scholl-smt08 | 71 | 37 | 68 | 52 | 69 | 69 |
preiner | tptp | 55 | 56 | 56 | 56 | 56 | 56 |
preiner | UltimateAutomizer | 137 | 137 | 137 | 137 | 137 | 137 |
wintersteiger | fixpoint | 74 | 75 | 74 | 75 | 75 | 75 |
wintersteiger | ranking | 20 | 14 | 19 | 19 | 19 | 19 |
4.3 Uniquely solved benchmarks
4.3.1 Only by Z3
res$all %>% filter(z3.solved & !boolector.solved & !cvc4.solved & !q3b.solved & !q3bOA.solved) %>% select(source, family, benchmark) %>% group_by(source, family) %>% summarise(count = n())
source | family | count |
---|---|---|
preiner | keymaera | 1 |
preiner | scholl-smt08 | 2 |
4.3.2 Only by Boolector
res$all %>% filter(!z3.solved & boolector.solved & !cvc4.solved & !q3b.solved & !q3bOA.solved) %>% select(source, family, benchmark) %>% group_by(source, family) %>% summarise(count = n())
source | family | count |
---|---|---|
preiner | keymaera | 1 |
preiner | scholl-smt08 | 11 |
4.3.3 Only by CVC4
res$all %>% filter(!z3.solved & !boolector.solved & cvc4.solved & !q3b.solved & !q3bOA.solved) %>% select(source, family, benchmark) %>% group_by(source, family) %>% summarise(count = n())
source | family | count |
---|---|---|
heizmann | nil | 5 |
preiner | scholl-smt08 | 1 |
4.3.4 Only by Q3B with OpApprox
res$all %>% filter(!z3.solved & !boolector.solved & !cvc4.solved & !q3b.solved & q3bOA.solved) %>% select(source, family, benchmark) %>% group_by(source, family) %>% summarise(count = n())
source | family | count |
---|---|---|
heizmann | nil | 1 |
preiner | scholl-smt08 | 3 |
4.3.5 Only by Q3B with OpApprox (excluding the original Q3B)
res$all %>% filter(!z3.solved & !boolector.solved & !cvc4.solved & q3bOA.solved) %>% select(source, family, benchmark) %>% group_by(source, family) %>% summarise(count = n())
source | family | count |
---|---|---|
heizmann | nil | 1 |
preiner | scholl-smt08 | 13 |
wintersteiger | fixpoint | 3 |
wintersteiger | ranking | 3 |
4.3.6 By none of the solvers
res$all %>% filter(!z3.solved & !boolector.solved & !cvc4.solved & !q3b.solved & !q3bOA.solved) %>% select(source, family, benchmark) %>% group_by(source, family) %>% summarise(count = n())
source | family | count |
---|---|---|
heizmann | nil | 2 |
preiner | keymaera | 4 |
preiner | psyco | 1 |
preiner | scholl-smt08 | 29 |
wintersteiger | fixpoint | 2 |
4.4 Cross comparison
4.4.1 Helper function to generate tables
First we need a function which for given configurations computes a number of benchmarks that the first configuration has solved, but the second one has not.
firstIsBetter <- function(benchmarkSet, c1, c2) { c1Solved <- res[[benchmarkSet]][[paste(c1, 'solved', sep='.')]] c2Solved <- res[[benchmarkSet]][[paste(c2, 'solved', sep='.')]] onlyC1Solved <- c1Solved & !(c2Solved) return(onlyC1Solved) } formulasFirstIsBetter <- function(benchmarkSet, c1, c2) { return(res[[benchmarkSet]]$benchmark[firstIsBetter(benchmarkSet, c1, c2)]) } compareConfigurations <- function(benchmarkSet, c1, c2) { return(sum(firstIsBetter(benchmarkSet, c1, c2))) }
We can use this function to generate the cross table.
crossTable <- function(benchmarkSet) { results <- c() for (c1 in configurations) { for (c2 in configurations) { results <- c(results, compareConfigurations(benchmarkSet, c1, c2)) } } results.table <- matrix(results, ncol=6,byrow=TRUE) colnames(results.table) <- configurations rownames(results.table) <- configurations out <- as.table(results.table) return(out) }
4.4.2 Wintersteiger
crossTable('wintersteiger')
boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt | |
---|---|---|---|---|---|---|
boolector | 0 | 8 | 11 | 1 | 1 | 1 |
cvc4 | 20 | 0 | 19 | 0 | 0 | 0 |
z3 | 15 | 11 | 0 | 1 | 1 | 1 |
q3b | 29 | 16 | 25 | 0 | 0 | 1 |
q3bOA | 29 | 16 | 25 | 0 | 0 | 1 |
q3bOAnoOpt | 28 | 15 | 24 | 0 | 0 | 0 |
4.4.3 Preiner
crossTable('preiner')
boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt | |
---|---|---|---|---|---|---|
boolector | 0 | 108 | 58 | 200 | 49 | 165 |
cvc4 | 9 | 0 | 31 | 172 | 36 | 151 |
z3 | 15 | 87 | 0 | 184 | 37 | 153 |
q3b | 20 | 91 | 47 | 0 | 0 | 0 |
q3bOA | 27 | 113 | 58 | 158 | 0 | 116 |
q3bOAnoOpt | 27 | 112 | 58 | 42 | 0 | 0 |
4.4.4 Heizmann
crossTable('heizmann')
boolector | cvc4 | z3 | q3b | q3bOA | q3bOAnoOpt | |
---|---|---|---|---|---|---|
boolector | 0 | 3 | 7 | 2 | 1 | 1 |
cvc4 | 97 | 0 | 94 | 14 | 14 | 14 |
z3 | 10 | 3 | 0 | 11 | 10 | 10 |
q3b | 83 | 1 | 89 | 0 | 1 | 1 |
q3bOA | 84 | 3 | 90 | 3 | 0 | 0 |
q3bOAnoOpt | 84 | 3 | 90 | 3 | 0 | 0 |
4.5 Formulas, where the result is different
4.5.1 Wintersteiger
formulasFirstIsBetter('wintersteiger', 'q3b', 'q3bOA')
x |
formulasFirstIsBetter('wintersteiger', 'q3bOA', 'q3b')
x |
4.5.2 Preiner
formulasFirstIsBetter('preiner', 'q3b', 'q3bOA')
x |
formulasFirstIsBetter('preiner', 'q3bOA', 'q3b')
x | |
---|---|
1 | Ecoli-chemotaxis-node7007.smt2 |
2 | Ecoli-chemotaxis-node7008.smt2 |
3 | Ecoli-chemotaxis-node8284.smt2 |
4 | Ecoli-chemotaxis-node8285.smt2 |
5 | binary_driver-2007-10-09-node10280.smt2 |
6 | binary_driver-2007-10-09-node11384.smt2 |
7 | binary_driver-2007-10-09-node12553.smt2 |
8 | binary_driver-2007-10-09-node13792.smt2 |
9 | binary_driver-2007-10-09-node14896.smt2 |
10 | binary_driver-2007-10-09-node8746.smt2 |
11 | dccs-example-node3897.smt2 |
12 | dccs-example-simple-node3459.smt2 |
13 | dccs-example-simple-node6247.smt2 |
14 | intersection-example-onelane.proof-node11126.smt2 |
15 | intersection-example-onelane.proof-node11136.smt2 |
16 | intersection-example-onelane.proof-node12481.smt2 |
17 | intersection-example-onelane.proof-node12491.smt2 |
18 | intersection-example-onelane.proof-node12962.smt2 |
19 | intersection-example-onelane.proof-node12972.smt2 |
20 | intersection-example-onelane.proof-node13822.smt2 |
21 | intersection-example-onelane.proof-node13832.smt2 |
22 | intersection-example-onelane.proof-node14158.smt2 |
23 | intersection-example-onelane.proof-node14168.smt2 |
24 | intersection-example-onelane.proof-node1469.smt2 |
25 | intersection-example-onelane.proof-node1479.smt2 |
26 | intersection-example-onelane.proof-node16095.smt2 |
27 | intersection-example-onelane.proof-node16105.smt2 |
28 | intersection-example-onelane.proof-node17872.smt2 |
29 | intersection-example-onelane.proof-node17882.smt2 |
30 | intersection-example-onelane.proof-node18278.smt2 |
31 | intersection-example-onelane.proof-node18288.smt2 |
32 | intersection-example-onelane.proof-node19598.smt2 |
33 | intersection-example-onelane.proof-node20269.smt2 |
34 | intersection-example-onelane.proof-node20279.smt2 |
35 | intersection-example-onelane.proof-node21152.smt2 |
36 | intersection-example-onelane.proof-node21162.smt2 |
37 | intersection-example-onelane.proof-node21488.smt2 |
38 | intersection-example-onelane.proof-node21498.smt2 |
39 | intersection-example-onelane.proof-node21762.smt2 |
40 | intersection-example-onelane.proof-node21772.smt2 |
41 | intersection-example-onelane.proof-node23307.smt2 |
42 | intersection-example-onelane.proof-node23317.smt2 |
43 | intersection-example-onelane.proof-node23782.smt2 |
44 | intersection-example-onelane.proof-node24127.smt2 |
45 | intersection-example-onelane.proof-node24137.smt2 |
46 | intersection-example-onelane.proof-node25171.smt2 |
47 | intersection-example-onelane.proof-node25181.smt2 |
48 | intersection-example-onelane.proof-node2524.smt2 |
49 | intersection-example-onelane.proof-node2534.smt2 |
50 | intersection-example-onelane.proof-node26897.smt2 |
51 | intersection-example-onelane.proof-node29803.smt2 |
52 | intersection-example-onelane.proof-node29813.smt2 |
53 | intersection-example-onelane.proof-node30209.smt2 |
54 | intersection-example-onelane.proof-node30219.smt2 |
55 | intersection-example-onelane.proof-node31529.smt2 |
56 | intersection-example-onelane.proof-node32239.smt2 |
57 | intersection-example-onelane.proof-node32249.smt2 |
58 | intersection-example-onelane.proof-node32497.smt2 |
59 | intersection-example-onelane.proof-node32507.smt2 |
60 | intersection-example-onelane.proof-node34026.smt2 |
61 | intersection-example-onelane.proof-node34036.smt2 |
62 | intersection-example-onelane.proof-node35453.smt2 |
63 | intersection-example-onelane.proof-node37008.smt2 |
64 | intersection-example-onelane.proof-node37018.smt2 |
65 | intersection-example-onelane.proof-node38360.smt2 |
66 | intersection-example-onelane.proof-node38370.smt2 |
67 | intersection-example-onelane.proof-node3855.smt2 |
68 | intersection-example-onelane.proof-node3865.smt2 |
69 | intersection-example-onelane.proof-node39715.smt2 |
70 | intersection-example-onelane.proof-node39725.smt2 |
71 | intersection-example-onelane.proof-node40196.smt2 |
72 | intersection-example-onelane.proof-node40206.smt2 |
73 | intersection-example-onelane.proof-node41056.smt2 |
74 | intersection-example-onelane.proof-node41066.smt2 |
75 | intersection-example-onelane.proof-node41392.smt2 |
76 | intersection-example-onelane.proof-node41402.smt2 |
77 | intersection-example-onelane.proof-node4261.smt2 |
78 | intersection-example-onelane.proof-node4271.smt2 |
79 | intersection-example-onelane.proof-node43329.smt2 |
80 | intersection-example-onelane.proof-node43339.smt2 |
81 | intersection-example-onelane.proof-node45106.smt2 |
82 | intersection-example-onelane.proof-node45116.smt2 |
83 | intersection-example-onelane.proof-node45512.smt2 |
84 | intersection-example-onelane.proof-node45522.smt2 |
85 | intersection-example-onelane.proof-node46832.smt2 |
86 | intersection-example-onelane.proof-node47503.smt2 |
87 | intersection-example-onelane.proof-node47513.smt2 |
88 | intersection-example-onelane.proof-node48386.smt2 |
89 | intersection-example-onelane.proof-node48396.smt2 |
90 | intersection-example-onelane.proof-node48722.smt2 |
91 | intersection-example-onelane.proof-node48732.smt2 |
92 | intersection-example-onelane.proof-node48996.smt2 |
93 | intersection-example-onelane.proof-node49006.smt2 |
94 | intersection-example-onelane.proof-node50541.smt2 |
95 | intersection-example-onelane.proof-node50551.smt2 |
96 | intersection-example-onelane.proof-node51016.smt2 |
97 | intersection-example-onelane.proof-node51361.smt2 |
98 | intersection-example-onelane.proof-node51371.smt2 |
99 | intersection-example-onelane.proof-node52405.smt2 |
100 | intersection-example-onelane.proof-node52415.smt2 |
101 | intersection-example-onelane.proof-node53595.smt2 |
102 | intersection-example-onelane.proof-node53901.smt2 |
103 | intersection-example-onelane.proof-node53911.smt2 |
104 | intersection-example-onelane.proof-node54078.smt2 |
105 | intersection-example-onelane.proof-node5581.smt2 |
106 | intersection-example-onelane.proof-node6556.smt2 |
107 | intersection-example-onelane.proof-node6566.smt2 |
108 | intersection-example-onelane.proof-node6814.smt2 |
109 | intersection-example-onelane.proof-node6824.smt2 |
110 | intersection-example-onelane.proof-node8099.smt2 |
111 | intersection-example-onelane.proof-node8109.smt2 |
112 | intersection-example-onelane.proof-node9774.smt2 |
113 | intersection-example-onelane.proof-node9784.smt2 |
114 | vsl.proof-node1351.smt2 |
115 | vsl.proof-node1722.smt2 |
116 | vsl.proof-node2202.smt2 |
117 | vsl.proof-node2735.smt2 |
118 | vsl.proof-node3106.smt2 |
119 | vsli-alert.proof-node2416.smt2 |
120 | vsli.proof-node2282.smt2 |
121 | 016.smt2 |
122 | 060.smt2 |
123 | 183.smt2 |
124 | 184.smt2 |
125 | RND/RND_3_12.smt2 |
126 | RND/RND_3_19.smt2 |
127 | RND/RND_3_25.smt2 |
128 | RND/RND_4_19.smt2 |
129 | RND/RND_4_21.smt2 |
130 | RND/RND_4_22.smt2 |
131 | RND/RND_4_30.smt2 |
132 | RND/RND_6_10.smt2 |
133 | RND/RND_6_12.smt2 |
134 | RND/RND_6_13.smt2 |
135 | RND/RND_6_14.smt2 |
136 | RND/RND_6_16.smt2 |
137 | RND/RND_6_17.smt2 |
138 | RND/RND_6_19.smt2 |
139 | RND/RND_6_20.smt2 |
140 | RND/RND_6_21.smt2 |
141 | RND/RND_6_23.smt2 |
142 | RND/RND_6_24.smt2 |
143 | RND/RND_6_27.smt2 |
144 | RND/RND_6_28.smt2 |
145 | RND/RND_6_29.smt2 |
146 | RND/RND_6_30.smt2 |
147 | RND/RND_6_31.smt2 |
148 | RND/RND_6_33.smt2 |
149 | RND/RND_6_35.smt2 |
150 | RND/RND_6_36.smt2 |
151 | RND/RND_6_37.smt2 |
152 | RND/RND_6_8.smt2 |
153 | RNDPRE/RNDPRE_4_24.smt2 |
154 | RNDPRE/RNDPRE_4_32.smt2 |
155 | RNDPRE/RNDPRE_4_35.smt2 |
156 | RNDPRE/RNDPRE_4_55.smt2 |
157 | RNDPRE/RNDPRE_4_64.smt2 |
158 | RNDPRE/RNDPRE_4_65.smt2 |
4.5.3 Heizmann
formulasFirstIsBetter('heizmann', 'q3b', 'q3bOA')
x | |
---|---|
1 | sum02_false-unreach-call_true-no-overflow.i_336.smt2 |
formulasFirstIsBetter('heizmann', 'q3bOA', 'q3b')
x | |
---|---|
1 | sum02_false-unreach-call_true-no-overflow.c_415.smt2 |
2 | sum02_true-unreach-call_true-no-overflow.i_375.smt2 |
3 | sum02_true-unreach-call_true-no-overflow.i_416.smt2 |
4.6 Differences in time
4.6.1 Helper functions
We define a function, which compares two configurations and returns benchmarks that were faster in the first configuration by at least 0.5 s.
getFaster <- function(benchmarkSet, c1, c2) { firstIsBetter <- (res[[benchmarkSet]][[paste(c1, 'cputime', sep='.')]] + 0.5 < res[[benchmarkSet]][[paste(c2, 'cputime', sep='.')]]) & (res[[benchmarkSet]][[paste(c2, 'solved', sep='.')]]) return(res[[benchmarkSet]][firstIsBetter, c('family', 'benchmark', paste(c1, 'cputime', sep='.'), paste(c2, 'cputime', sep='.'))]) } getResourceConsumption <- function(benchmarkSet) { d <- res[[benchmarkSet]] d[which((d$q3b.result == "sat" & d$q3bOA.result == "sat") | (d$q3b.result == "unsat" & d$q3bOA.result == "unsat")),] %>% summarise(q3bCPU = sum(q3b.cputime), q3bMEM = sum(q3b.memusage), q3bOACPU = sum(q3bOA.cputime), q3bOAMEM = sum(q3bOA.memusage)) }
4.6.2 Total time/memory taken
getResourceConsumption('wintersteiger')
q3bCPU | q3bMEM | q3bOACPU | q3bOAMEM |
---|---|---|---|
10271.914595016 | 35347300352 | 14527.289808636 | 47733235712 |
getResourceConsumption('preiner')
q3bCPU | q3bMEM | q3bOACPU | q3bOAMEM |
---|---|---|---|
56547.469152472 | 185058263040 | 36869.809526722 | 155845062656 |
getResourceConsumption('heizmann')
q3bCPU | q3bMEM | q3bOACPU | q3bOAMEM |
---|---|---|---|
1829.125165153 | 17732628480 | 771.060594065 | 18478931968 |
4.6.3 Degraded performance
These are the results where the approximations increase the solving time by more than 0.5 s.
- Total count
nrow(getFaster('all', 'q3b', 'q3bOA'))
x 115 - Wintersteiger
getFaster('wintersteiger', 'q3b', 'q3bOA')
family benchmark q3b.cputime q3bOA.cputime ranking 1394diag_ioctl.c.smt2 671.587307652 1417.905386247 ranking 1394diag_isochapi.c.smt2 107.42415153 145.690326377 ranking AVStream_hwsim.cpp.smt2 1.177106004 2.45390292 ranking audio_ac97_common.cpp.smt2 107.182787838 146.083092278 ranking audio_ac97_rtstream.cpp.smt2 107.514638853 146.758227597 ranking audio_ac97_wavepcistream.cpp.smt2 1470.802759781 3322.724922215 ranking audio_ddksynth_voice.cpp.smt2 0.467101721 5.540378279 ranking audio_dmusuart_mpu.cpp.smt2 107.488648664 146.569389932 ranking audio_gfxswap.xp_filter.cpp.smt2 0.851135804 1.558473792 ranking filesys_cdfs_allocsup.c.smt2 132.626686447 161.650561263 ranking filesys_fastfat_cachesup.c.smt2 107.181146596 145.880798036 ranking filesys_fastfat_easup.c.smt2 109.305597157 145.953210713 ranking general_pcidrv_sys_hw_eeprom.c.smt2 0.173784249 3.002112054 ranking hid_firefly_app_firefly.cpp.smt2 48.802417474 134.542711733 ranking hid_hclient_ecdisp.c.smt2 113.192036922 150.542453366 ranking input_pnpi8042_moudep.c.smt2 108.212822482 156.439056916 ranking kernel_uagp35_gart.c.smt2 107.280160455 155.555740697 ranking kmdf_osrusbfx2_exe_dump.c.smt2 0.640004817 8.035077041 ranking mmedia_gsm610_gsm610.c.smt2 113.079313483 157.687509459 ranking mmedia_imaadpcm_imaadpcm.c.smt2 14.977591171 23.512965943 ranking network_irda_miniport_nscirda_comm.c.smt2 0.63599908 6.175644587 ranking network_ndis_coisdn_TpiParam.c.smt2 111.981431636 156.908094151 ranking network_ndis_rtlnwifi_extsta_st_aplst.c.smt2 107.853437712 154.784069571 ranking network_ndis_rtlnwifi_extsta_st_misc.c.smt2 523.67354059 1562.146911173 ranking network_usbnwifi_mp_util.c.smt2 2.798840558 17.405900201 - Preiner
getFaster('preiner', 'q3b', 'q3bOA')
family benchmark q3b.cputime q3bOA.cputime keymaera ETCS-essentials-node3023.smt2 0.761381522 1.453021719 keymaera binary_driver-2007-10-09-node12552.smt2 3.074012997 4.915266169 keymaera binary_driver-2007-10-09-node8745.smt2 5.05642805 7.892995259 keymaera bouncing-ball-simple-node5920.smt2 2.702275935 7.898430651 keymaera breaking-node2128.smt2 0.797799935 1.503937369 keymaera intersection-example-onelane.proof-node12517.smt2 1.055366416 1.884650801 keymaera intersection-example-onelane.proof-node31286.smt2 0.852529758 1.500007313 keymaera intersection-example-onelane.proof-node39751.smt2 1.056686747 1.895434925 keymaera intersection-example-onelane.proof-node5338.smt2 0.843131359 1.492254846 keymaera safety-lemma-node13046.smt2 1.664440236 3.297497288 keymaera train_goal6-node8607.smt2 1.628834489 3.262541088 keymaera vsli-alert.proof-node2442.smt2 43.786972111 87.403758318 keymaera vsli.proof-node2308.smt2 49.174914674 106.276966507 psyco 005.smt2 1.180574684 18.092380629 psyco 006.smt2 1.109374699 17.884192677 psyco 036.smt2 330.28922678 333.358735238 psyco 037.smt2 324.149088401 329.421187357 psyco 079.smt2 723.919693039 736.108938808 psyco 080.smt2 685.342760807 707.018101909 psyco 083.smt2 676.556121597 726.305845114 psyco 084.smt2 713.627832156 720.153448149 psyco 115.smt2 2.113850465 3.73514407 psyco 116.smt2 2.12203046 32.312203297 psyco 138.smt2 157.29214281 162.47832241 psyco 143.smt2 0.339719648 0.903540082 psyco 153.smt2 0.710333882 1.777799322 psyco 154.smt2 0.686893367 2.016368237 psyco 159.smt2 1.373608934 8.626490044 psyco 170.smt2 2898.514730796 2927.011825948 psyco 172.smt2 3.407084539 6.737689669 psyco 173.smt2 3.395646074 40.100174375 scholl-smt08 RND/RND_3_13.smt2 160.775761375 164.006917485 scholl-smt08 RND/RND_3_2.smt2 211.921715074 370.427616215 scholl-smt08 RND/RND_3_23.smt2 11.440463161 17.779460724 scholl-smt08 RND/RND_3_26.smt2 38.51347549 64.442988046 scholl-smt08 RND/RND_3_3.smt2 81.823767675 121.484194662 scholl-smt08 RND/RND_3_4.smt2 40.988748903 57.68242831 scholl-smt08 RND/RND_3_6.smt2 4.447337029 5.056113461 scholl-smt08 RND/RND_4_1.smt2 1358.110785151 4596.355982227 scholl-smt08 RND/RND_4_10.smt2 23.57007602 31.78109152 scholl-smt08 RND/RND_4_17.smt2 16.781952943 20.902500698 scholl-smt08 RND/RND_4_5.smt2 455.790696526 609.616239804 scholl-smt08 RND/RND_4_7.smt2 87.283331007 89.21377268 scholl-smt08 RND/RND_6_1.smt2 260.370919081 344.469927938 scholl-smt08 RNDPRE/RNDPRE_3_1.smt2 2.8637689 3.506070649 scholl-smt08 RNDPRE/RNDPRE_3_11.smt2 22.353424387 33.376241487 scholl-smt08 RNDPRE/RNDPRE_3_12.smt2 80.83653691 94.851198525 scholl-smt08 RNDPRE/RNDPRE_3_14.smt2 2.048917488 2.716594188 scholl-smt08 RNDPRE/RNDPRE_3_18.smt2 23.409304934 35.633548173 scholl-smt08 RNDPRE/RNDPRE_3_20.smt2 4.077024344 4.797727861 scholl-smt08 RNDPRE/RNDPRE_3_21.smt2 80.763015609 93.066239118 scholl-smt08 RNDPRE/RNDPRE_3_22.smt2 1165.82234427 1338.829963454 scholl-smt08 RNDPRE/RNDPRE_3_24.smt2 10.191486414 11.635671919 scholl-smt08 RNDPRE/RNDPRE_3_31.smt2 63.863727399 68.420517213 scholl-smt08 RNDPRE/RNDPRE_3_39.smt2 57.550011084 60.737275742 scholl-smt08 RNDPRE/RNDPRE_3_41.smt2 56.980588568 62.139511867 scholl-smt08 RNDPRE/RNDPRE_3_44.smt2 48.294877044 67.466062472 scholl-smt08 RNDPRE/RNDPRE_3_45.smt2 2416.317217598 3767.803495442 scholl-smt08 RNDPRE/RNDPRE_3_49.smt2 36.455572791 43.328566235 scholl-smt08 RNDPRE/RNDPRE_3_51.smt2 185.475250077 215.754833315 scholl-smt08 RNDPRE/RNDPRE_3_53.smt2 93.477684338 116.191977579 scholl-smt08 RNDPRE/RNDPRE_3_59.smt2 42.526751455 49.431820069 scholl-smt08 RNDPRE/RNDPRE_3_6.smt2 2.511326697 3.655771737 scholl-smt08 RNDPRE/RNDPRE_4_33.smt2 263.254385614 281.938122092 scholl-smt08 RNDPRE/RNDPRE_4_38.smt2 340.824356162 743.455711418 scholl-smt08 RNDPRE/RNDPRE_4_4.smt2 795.058177052 1080.018525106 scholl-smt08 RNDPRE/RNDPRE_4_40.smt2 3053.529413943 3612.426750411 scholl-smt08 RNDPRE/RNDPRE_4_57.smt2 44.734779512 45.340850489 scholl-smt08 RNDPRE/RNDPRE_4_8.smt2 123.110106248 169.895047003 scholl-smt08 RNDPRE/RNDPRE_4_9.smt2 141.150958497 173.825892858 - Heizmann
getFaster('heizmann', 'q3b', 'q3bOA')
family benchmark q3b.cputime q3bOA.cputime nil gcd_2_true-unreach-call_true-no-overflow.i_914.smt2 3.114726554 4.101953446 nil gcd_2_true-unreach-call_true-no-overflow.i_921.smt2 3.116258075 4.055646007 nil jain_4_true-unreach-call_true-no-overflow.i_198.smt2 0.225999084 1.21812294 nil jain_4_true-unreach-call_true-no-overflow.i_200.smt2 0.25686148 1.253770278 nil jain_4_true-unreach-call_true-no-overflow.i_225.smt2 2.846460092 4.131623695 nil jain_4_true-unreach-call_true-no-overflow.i_228.smt2 0.237294655 1.35334369 nil jain_4_true-unreach-call_true-no-overflow.i_341.smt2 0.200606218 0.862380401 nil jain_6_true-unreach-call_true-no-overflow.i_198.smt2 0.254291489 1.508441629 nil jain_6_true-unreach-call_true-no-overflow.i_200.smt2 0.259098131 1.556182019 nil jain_6_true-unreach-call_true-no-overflow.i_225.smt2 2.931025827 4.986284754 nil jain_6_true-unreach-call_true-no-overflow.i_230.smt2 0.262546205 1.640234998 nil jain_6_true-unreach-call_true-no-overflow.i_339.smt2 0.225196145 1.23780286 nil jain_7_true-unreach-call_true-no-overflow.i_215.smt2 0.293578357 20.900286224 nil jain_7_true-unreach-call_true-no-overflow.i_217.smt2 0.301615327 20.983302616 nil jain_7_true-unreach-call_true-no-overflow.i_242.smt2 0.308568166 31.420537673 nil jain_7_true-unreach-call_true-no-overflow.i_245.smt2 0.283402205 20.645164208 nil jain_7_true-unreach-call_true-no-overflow.i_259.smt2 0.157248987 6.999356617 nil jain_7_true-unreach-call_true-no-overflow.i_262.smt2 0.302941701 20.872302902 nil jain_7_true-unreach-call_true-no-overflow.i_382.smt2 0.247529518 10.186497986 nil jain_7_true-unreach-call_true-no-overflow.i_61.smt2 0.16028079 6.93921993
4.6.4 Improved performance
These are the results where the approximations decrease the solving time by more than 0.5 s.
- Total count
nrow(getFaster('all', 'q3bOA', 'q3b'))
x 96 - Wintersteiger
getFaster('wintersteiger', 'q3bOA', 'q3b')
family benchmark q3bOA.cputime q3b.cputime fixpoint sdlx-fixpoint-7.smt2 636.680779933 645.128950166 fixpoint sdlx-fixpoint-8.smt2 5346.053308338 5381.851551904 - Preiner
getFaster('preiner', 'q3bOA', 'q3b')
family benchmark q3bOA.cputime q3b.cputime keymaera binary_driver-2007-10-09-node10279.smt2 2.487087945 3.151799603 keymaera binary_driver-2007-10-09-node13791.smt2 2.473819409 3.104345336 keymaera safety-lemma-node14050.smt2 1.607410994 5.306416397 keymaera safety-lemma-node9082.smt2 17.351520903 20.241532654 keymaera train_goal1-node5682.smt2 1.600008342 5.295491251 keymaera train_goal3-node5717.smt2 17.877822059 21.210864749 psyco 035.smt2 324.106964028 377.346064139 psyco 059.smt2 1.156345407 2436.124622957 psyco 078.smt2 697.445739388 700.323267796 psyco 081.smt2 729.281808289 730.011129883 psyco 082.smt2 695.503608239 731.548550458 psyco 085.smt2 714.349296492 726.842541605 psyco 086.smt2 716.430426521 727.375572516 psyco 087.smt2 714.251385993 726.47276434 psyco 088.smt2 701.917221941 731.765957719 psyco 124.smt2 1.395648821 2.377621553 psyco 155.smt2 0.45303503 2.529845927 psyco 168.smt2 1171.482072847 1173.113759855 psyco 195.smt2 100.771228139 101.59982281 scholl-smt08 RND/RND_3_1.smt2 1.889658294 5.811979496 scholl-smt08 RND/RND_3_11.smt2 5.429106369 349.341929162 scholl-smt08 RND/RND_3_14.smt2 17.897407595 623.609974835 scholl-smt08 RND/RND_3_15.smt2 0.770786603 6.461161507 scholl-smt08 RND/RND_3_16.smt2 4.545652793 37.449699345 scholl-smt08 RND/RND_3_17.smt2 11.241660131 14.752083302 scholl-smt08 RND/RND_3_21.smt2 10.772769046 16.427674641 scholl-smt08 RND/RND_3_22.smt2 85.696575773 3215.819319923 scholl-smt08 RND/RND_3_27.smt2 5.506705 7.966480083 scholl-smt08 RND/RND_3_29.smt2 13.103730901 138.46411448 scholl-smt08 RND/RND_3_9.smt2 0.94652083 8.436300399 scholl-smt08 RND/RND_4_11.smt2 8.563307467 84.37104354 scholl-smt08 RND/RND_4_12.smt2 9.53956596 4235.859544771 scholl-smt08 RND/RND_4_15.smt2 30.134178802 32.983549076 scholl-smt08 RND/RND_4_23.smt2 31.566834216 287.756033364 scholl-smt08 RND/RND_4_24.smt2 190.757992528 1322.358761372 scholl-smt08 RND/RND_4_25.smt2 167.120237002 333.547240883 scholl-smt08 RND/RND_4_28.smt2 192.878609488 216.088515036 scholl-smt08 RND/RND_4_29.smt2 35.178766725 1790.508208779 scholl-smt08 RND/RND_4_4.smt2 1045.328163469 1057.006101041 scholl-smt08 RND/RND_4_6.smt2 2.810378875 18.603396187 scholl-smt08 RND/RND_6_18.smt2 342.399324654 1782.288360113 scholl-smt08 RND/RND_6_3.smt2 0.649935062 1.675127776 scholl-smt08 RND/RND_6_4.smt2 1.133371754 62.324435998 scholl-smt08 RND/RND_6_5.smt2 5.200997536 22.330153093 scholl-smt08 RND/RND_6_6.smt2 5.270796785 1121.119919961 scholl-smt08 RND/RND_6_7.smt2 4.998338076 176.530199489 scholl-smt08 RND/RND_6_9.smt2 1.769435082 17.49054758 scholl-smt08 RNDPRE/RNDPRE_3_15.smt2 14.666402256 15.391217505 scholl-smt08 RNDPRE/RNDPRE_3_16.smt2 14.206145704 15.756987725 scholl-smt08 RNDPRE/RNDPRE_3_17.smt2 5.23517539 33.649540902 scholl-smt08 RNDPRE/RNDPRE_3_26.smt2 2.588325318 35.636075564 scholl-smt08 RNDPRE/RNDPRE_3_28.smt2 4.072746926 5.417138591 scholl-smt08 RNDPRE/RNDPRE_3_29.smt2 3.874189601 4.671661551 scholl-smt08 RNDPRE/RNDPRE_3_30.smt2 2.734287667 3.432511614 scholl-smt08 RNDPRE/RNDPRE_3_34.smt2 17.868906689 22.746956995 scholl-smt08 RNDPRE/RNDPRE_3_35.smt2 19.881819194 938.411815458 scholl-smt08 RNDPRE/RNDPRE_3_37.smt2 7.325950314 325.586238466 scholl-smt08 RNDPRE/RNDPRE_3_38.smt2 16.034573308 17.046350609 scholl-smt08 RNDPRE/RNDPRE_3_40.smt2 6.129053044 7.81847456 scholl-smt08 RNDPRE/RNDPRE_3_43.smt2 10.542567208 13.37075801 scholl-smt08 RNDPRE/RNDPRE_3_48.smt2 4.524957574 5.421733311 scholl-smt08 RNDPRE/RNDPRE_3_5.smt2 7.071562511 41.440126171 scholl-smt08 RNDPRE/RNDPRE_3_50.smt2 12.021425061 14.128029887 scholl-smt08 RNDPRE/RNDPRE_3_54.smt2 16.11967485 20.387409729 scholl-smt08 RNDPRE/RNDPRE_3_60.smt2 36.716722002 47.183605343 scholl-smt08 RNDPRE/RNDPRE_4_1.smt2 211.368804631 255.553955107 scholl-smt08 RNDPRE/RNDPRE_4_10.smt2 7.753270781 10.70993155 scholl-smt08 RNDPRE/RNDPRE_4_13.smt2 138.382260703 169.350536309 scholl-smt08 RNDPRE/RNDPRE_4_20.smt2 5.817725783 4756.116208186 scholl-smt08 RNDPRE/RNDPRE_4_22.smt2 79.681774593 818.135048049 scholl-smt08 RNDPRE/RNDPRE_4_23.smt2 61.594335349 63.358403247 scholl-smt08 RNDPRE/RNDPRE_4_25.smt2 6.842101137 8.586819705 scholl-smt08 RNDPRE/RNDPRE_4_27.smt2 7.144846459 319.995529854 scholl-smt08 RNDPRE/RNDPRE_4_29.smt2 11.442101052 486.974968373 scholl-smt08 RNDPRE/RNDPRE_4_3.smt2 30.108419838 42.660076666 scholl-smt08 RNDPRE/RNDPRE_4_30.smt2 733.131736181 1761.760700473 scholl-smt08 RNDPRE/RNDPRE_4_37.smt2 8.635494177 190.045851998 scholl-smt08 RNDPRE/RNDPRE_4_43.smt2 19.220412922 224.856578689 scholl-smt08 RNDPRE/RNDPRE_4_50.smt2 45.514374379 263.633406911 scholl-smt08 RNDPRE/RNDPRE_4_6.smt2 287.89092773 348.77595503 scholl-smt08 model/model_5_33.smt2 0.707994297 1.356916268 scholl-smt08 model/model_5_34.smt2 1.23627363 1.810174616 scholl-smt08 model/model_5_35.smt2 1.531462032 2.168357717 scholl-smt08 model/model_5_36.smt2 1.869723165 3.58982786 scholl-smt08 model/model_5_37.smt2 1.132253005 1.844461486 scholl-smt08 model/model_5_38.smt2 1.404746752 2.126453859 scholl-smt08 model/model_5_39.smt2 1.474160876 3.552230534 scholl-smt08 model/model_5_45.smt2 0.837594013 1.643754266 - Heizmann
getFaster('heizmann', 'q3bOA', 'q3b')
family benchmark q3bOA.cputime q3b.cputime nil gcd_2_true-unreach-call_true-no-overflow.i_925.smt2 12.024143788 1180.25749113 nil gcd_3_true-unreach-call_true-no-overflow.i_1105.smt2 171.067154581 172.472060124 nil jain_6_true-unreach-call_true-no-overflow.i_236.smt2 21.501500651 26.055018963 nil verisec_sendmail__tTflag_arr_one_loop_false-unreach-call.i_1353.smt2 1.274247397 2.161401277 nil verisec_sendmail__tTflag_arr_one_loop_false-unreach-call.i_2400.smt2 6.65083382 10.465717258 nil verisec_sendmail__tTflag_arr_one_loop_false-unreach-call.i_2406.smt2 6.682220397 37.14365104
5 Plots
5.1 Scatter plots
5.1.1 Helper functions
We again define a function, which can generate a scatter plot for a given benchmark set and two configurations.
scatterPlot <- function(benchmarkSet, c1, c2) { return (ggplot(res[[benchmarkSet]], aes(res[[benchmarkSet]][[paste(c1, 'cputime', sep='.')]], res[[benchmarkSet]][[paste(c2, 'cputime', sep='.')]], colour=factor(q3b.result, c("timeout", "sat", "unsat", "out of memory"))), xlim=c(0.001,cpuTimeout), ylim=c(0.01,cpuTimeout)) + geom_point() + geom_abline(intercept=0) + geom_abline(intercept=1, color='gray') + geom_abline(intercept=-1, color='gray') + geom_abline(intercept=2, color='gray') + geom_abline(intercept=-2, color='gray') + xlab(c1) + ylab(c2) + scale_x_log10(limits = c(0.01, cpuTimeout), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + scale_y_log10(limits = c(0.01, cpuTimeout), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + guides(colour=FALSE) + theme(legend.title=element_blank())) } scatterPlotMem <- function(benchmarkSet, c1, c2) { return (ggplot(res[[benchmarkSet]], aes(res[[benchmarkSet]][[paste(c1, 'memusage', sep='.')]], res[[benchmarkSet]][[paste(c2, 'memusage', sep='.')]], colour=factor(q3b.result, c("timeout", "sat", "unsat", "out of memory"))), xlim=c(0.001,8000000000), ylim=c(0.01,8000000000)) + geom_point() + geom_abline(intercept=0) + xlab(c1) + ylab(c2) + scale_x_log10(limits = c(1000000, 16000000000), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + scale_y_log10(limits = c(1000000, 16000000000), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + guides(colour=FALSE) + theme(legend.title=element_blank())) }
5.1.2 Wintersteiger
scatterPlot('wintersteiger', 'q3b', 'q3bOA')
scatterPlotMem('wintersteiger', 'q3b', 'q3bOA')
5.1.3 Preiner
scatterPlot('preiner', 'q3b', 'q3bOA')
scatterPlotMem('preiner', 'q3b', 'q3bOA')
5.1.4 Heizmann
scatterPlot('heizmann', 'q3b', 'q3bOA')
scatterPlotMem('heizmann', 'q3b', 'q3bOA')
5.2 Quantile plots
5.2.1 Helper functions
quantilePlot <- function(benchmarkSet, onlyTrivial = FALSE) { num <- length(configurations) data <- res[[benchmarkSet]] if (onlyTrivial) { data <- filter(data, trivial == FALSE) } ordered = list() for (c in configurations) { ordered[[c]] = sort(data[[paste(c, 'cputime', sep='.')]][data[[paste(c, 'solved', sep='.')]]]) } plot(c(0, nrow(data)), c(0.001, cpuTimeout), log='y', xlab='Solved formulas', ylab='CPU time (s)', frame.plot=TRUE, type='n', yaxt="n") axis(2, at = c(0.001, 0.01, 0.1, 1, 10, 100, 1000), labels = c(expression(paste("10"^"-3")), expression(paste("10"^"-2")), expression(paste("10"^"-1")), "1", "10", expression(paste("10"^"2")), expression(paste("10"^"3")))) colors <- c("blue", "darkgreen", "red", "black", "purple") ltys <- c(5,6,4,1,2) for (i in seq_along(configurations)) { c <- configurations[i] lines(1:length(ordered[[c]]), ordered[[c]], type='s', col=colors[i], lty=ltys[i]) } legend("topleft", lty=ltys, lwd=rep(2, each=num), col=colors, legend=labels) }
quantilePlot('wintersteiger')
quantilePlot('preiner')
quantilePlot('heizmann')
quantilePlot('all')
5.3 Quantile plots of non-trivial benchmarks
This plot shows only result that are not trivial (i.e. some solver took more than 0.1 second to solve it)
This is the number of trivial benchmarks
nrow(filter(res$all, trivial))
3168
quantilePlot('all', TRUE)
quantilePlot('all', TRUE)
5.4 Unsolved benchmarks
plotUnsolved <- function(cs) { cs <- rev(cs) unsolved = data.frame(source=character(), configuration=character(), stringsAsFactors=TRUE) for (c in cs) { cUnsolved <- res$all[["source"]][!res$all[[paste(c, 'solved', sep='.')]]] cUnsolved <- data.frame( source = cUnsolved, configuration = labels[c]) unsolved <- rbind(unsolved, cUnsolved) } chart.data <- unsolved %>% group_by(source, configuration) %>% summarize(freq = n()) %>% arrange(desc(source)) %>% group_by(configuration) %>% mutate(pos = cumsum(freq) - (0.5 * freq)) ggplot(data = chart.data, aes(x = configuration, y = freq, fill = source)) + geom_bar(stat="identity") + coord_flip() + geom_text(data=chart.data, aes(x = configuration, y = pos, label = freq), size=3) + labs(y = "Number of unsolved benchmarks (less is better)", x = NULL, fill = "Benchmark set") + scale_fill_brewer(palette = "Set2") }
plotUnsolved(c('boolector', 'cvc4', 'z3', 'q3b', 'q3bOA'))