[doc] Fix HowToManuallyUseTheIndividualPiecesOfPolly

Also remove compiled binaries.

llvm-svn: 343119
This commit is contained in:
Michael Kruse 2018-09-26 15:22:39 +00:00
parent fe7bd34b79
commit 3b4d331d8c
39 changed files with 2702 additions and 2110 deletions

View File

@ -21,7 +21,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
clang -S -emit-llvm matmul.c -o matmul.s
clang -S -emit-llvm matmul.c -Xclang -disable-O0-optnone -o matmul.ll
2. **Prepare the LLVM-IR for Polly**
@ -34,7 +34,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
opt -S -polly-canonicalize matmul.s > matmul.preopt.ll
opt -S -polly-canonicalize matmul.ll -o matmul.preopt.ll
3. **Show the SCoPs detected by Polly (optional)**
--------------------------------------------------
@ -45,7 +45,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt -polly-ast -analyze -q matmul.preopt.ll -polly-process-unprofitable
$ opt -basicaa -polly-ast -analyze matmul.preopt.ll -polly-process-unprofitable -polly-use-llvm-names
.. code-block:: guess
@ -84,8 +84,8 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt -view-scops -disable-output matmul.preopt.ll
$ opt -view-scops-only -disable-output matmul.preopt.ll
$ opt -polly-use-llvm-names -basicaa -view-scops -disable-output matmul.preopt.ll
$ opt -polly-use-llvm-names -basicaa -view-scops-only -disable-output matmul.preopt.ll
The output for the different functions:
@ -104,7 +104,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
$ opt -polly-use-llvm-names -basicaa -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
.. code-block:: guess
@ -194,7 +194,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt -polly-dependences -analyze matmul.preopt.ll -polly-process-unprofitable
$ opt -basicaa -polly-use-llvm-names -polly-dependences -analyze matmul.preopt.ll -polly-process-unprofitable
.. code-block:: guess
@ -226,7 +226,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
$ opt -basicaa -polly-use-llvm-names -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
.. code-block:: guess
@ -254,7 +254,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt matmul.preopt.ll -polly-import-jscop -polly-ast -analyze -polly-process-unprofitable
$ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-ast -analyze -polly-process-unprofitable
.. code-block:: c
@ -282,7 +282,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-ast -analyze -polly-process-unprofitable
$ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-ast -analyze -polly-process-unprofitable
.. code-block:: c
@ -311,7 +311,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
$ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
.. code-block:: c
@ -346,7 +346,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
$ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
.. code-block:: c
@ -383,11 +383,11 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt matmul.preopt.ll | opt -O3 > matmul.normalopt.ll
$ opt -S matmul.preopt.ll | opt -S -O3 -o matmul.normalopt.ll
.. code-block:: console
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-codegen -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged.ll
$ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-codegen -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged.ll
.. code-block:: guess
@ -397,7 +397,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-codegen -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled.ll
$ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-codegen -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled.ll
.. code-block:: guess
@ -407,7 +407,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled+vector.ll
$ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled+vector.ll
.. code-block:: guess
@ -417,7 +417,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-parallel -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled+openmp.ll
$ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-parallel -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled+openmp.ll
.. code-block:: guess
@ -431,11 +431,16 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
$ llc matmul.normalopt.ll -o matmul.normalopt.s && gcc matmul.normalopt.s -o matmul.normalopt.exe
$ llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s && gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
$ llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s && gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
$ llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s && gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
$ llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s && gcc -fopenmp matmul.polly.interchanged+tiled+vector+openmp.s -o matmul.polly.interchanged+tiled+vector+openmp.exe
$ llc matmul.normalopt.ll -o matmul.normalopt.s -relocation-model=pic
$ gcc matmul.normalopt.s -o matmul.normalopt.exe
$ llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s -relocation-model=pic
$ gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
$ llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s -relocation-model=pic
$ gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
$ llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s -relocation-model=pic
$ gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
$ llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s -relocation-model=pic
$ gcc matmul.polly.interchanged+tiled+vector+openmp.s -lgomp -o matmul.polly.interchanged+tiled+vector+openmp.exe
11. **Compare the runtime of the executables**
----------------------------------------------

View File

@ -1,33 +1,39 @@
{
"arrays" : [
"arrays": [
{
"name" : "MemRef_A",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_A",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_B",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_B",
"sizes": [
"*",
"1536"
],
"type": "float"
}
],
"context" : "{ : }",
"name" : "%for.cond1.preheader---%for.end19",
"statements" : [
"context": "{ : }",
"name": "%for.cond1.preheader---%for.end19",
"statements": [
{
"accesses" : [
"accesses": [
{
"kind" : "write",
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name" : "Stmt_for_body3",
"schedule" : "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name": "Stmt_for_body3",
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
}
]
}
}

View File

@ -0,0 +1,39 @@
{
"arrays": [
{
"name": "MemRef_A",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name": "MemRef_B",
"sizes": [
"*",
"1536"
],
"type": "float"
}
],
"context": "{ : }",
"name": "%for.cond1.preheader---%for.end19",
"statements": [
{
"accesses": [
{
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
},
{
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
}
],
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name": "Stmt_for_body3",
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
}
]
}

View File

@ -0,0 +1,39 @@
{
"arrays": [
{
"name": "MemRef_A",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name": "MemRef_B",
"sizes": [
"*",
"1536"
],
"type": "float"
}
],
"context": "{ : }",
"name": "%for.cond1.preheader---%for.end19",
"statements": [
{
"accesses": [
{
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
},
{
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
}
],
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name": "Stmt_for_body3",
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
}
]
}

View File

@ -0,0 +1,39 @@
{
"arrays": [
{
"name": "MemRef_A",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name": "MemRef_B",
"sizes": [
"*",
"1536"
],
"type": "float"
}
],
"context": "{ : }",
"name": "%for.cond1.preheader---%for.end19",
"statements": [
{
"accesses": [
{
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
},
{
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
}
],
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name": "Stmt_for_body3",
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
}
]
}

View File

@ -1,57 +1,66 @@
{
"arrays" : [
"arrays": [
{
"name" : "MemRef_C",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_C",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_A",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_A",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_B",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_B",
"sizes": [
"*",
"1536"
],
"type": "float"
}
],
"context" : "{ : }",
"name" : "%for.cond1.preheader---%for.end30",
"statements" : [
"context": "{ : }",
"name": "%for.cond1.preheader---%for.end30",
"statements": [
{
"accesses" : [
"accesses": [
{
"kind" : "write",
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name" : "Stmt_for_body3",
"schedule" : "{ Stmt_for_body3[i0, i1] -> [i0, i1, 0, 0] }"
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name": "Stmt_for_body3",
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1, 0, 0] }"
},
{
"accesses" : [
"accesses": [
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
"name" : "Stmt_for_body8",
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [i0, i1, 1, i2] }"
"domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
"name": "Stmt_for_body8",
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> [i0, i1, 1, i2] }"
}
]
}
}

View File

@ -1,57 +1,66 @@
{
"arrays" : [
"arrays": [
{
"name" : "MemRef_C",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_C",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_A",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_A",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_B",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_B",
"sizes": [
"*",
"1536"
],
"type": "float"
}
],
"context" : "{ : }",
"name" : "%for.cond1.preheader---%for.end30",
"statements" : [
"context": "{ : }",
"name": "%for.cond1.preheader---%for.end30",
"statements": [
{
"accesses" : [
"accesses": [
{
"kind" : "write",
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name" : "Stmt_for_body3",
"schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0] }"
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name": "Stmt_for_body3",
"schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0] }"
},
{
"accesses" : [
"accesses": [
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
"name" : "Stmt_for_body8",
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, i0, i2, i1] }"
"domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
"name": "Stmt_for_body8",
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, i0, i2, i1] }"
}
]
}
}

View File

@ -1,57 +1,66 @@
{
"arrays" : [
"arrays": [
{
"name" : "MemRef_C",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_C",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_A",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_A",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_B",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_B",
"sizes": [
"*",
"1536"
],
"type": "float"
}
],
"context" : "{ : }",
"name" : "%for.cond1.preheader---%for.end30",
"statements" : [
"context": "{ : }",
"name": "%for.cond1.preheader---%for.end30",
"statements": [
{
"accesses" : [
"accesses": [
{
"kind" : "write",
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name" : "Stmt_for_body3",
"schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0 ] }"
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name": "Stmt_for_body3",
"schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0 ] }"
},
{
"accesses" : [
"accesses": [
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
"name" : "Stmt_for_body8",
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, i1]: o0 <= i0 < o0 + 64 and o1 <= i1 < o1 + 64 and o2 <= i2 < o2 + 64 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 }"
"domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
"name": "Stmt_for_body8",
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, i1]: o0 <= i0 < o0 + 64 and o1 <= i1 < o1 + 64 and o2 <= i2 < o2 + 64 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 }"
}
]
}
}

View File

@ -1,57 +1,66 @@
{
"arrays" : [
"arrays": [
{
"name" : "MemRef_C",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_C",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_A",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_A",
"sizes": [
"*",
"1536"
],
"type": "float"
},
{
"name" : "MemRef_B",
"sizes" : [ "1536" ],
"type" : "float"
"name": "MemRef_B",
"sizes": [
"*",
"1536"
],
"type": "float"
}
],
"context" : "{ : }",
"name" : "%for.cond1.preheader---%for.end30",
"statements" : [
"context": "{ : }",
"name": "%for.cond1.preheader---%for.end30",
"statements": [
{
"accesses" : [
"accesses": [
{
"kind" : "write",
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name" : "Stmt_for_body3",
"schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0, 0 ] }"
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
"name": "Stmt_for_body3",
"schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0, 0 ] }"
},
{
"accesses" : [
"accesses": [
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
"kind": "write",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
"domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
"name" : "Stmt_for_body8",
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, oo1, i1]: o0 <= i0 < o0 + 64 and o1 <= oo1 < o1 + 64 and o2 <= i2 < o2 + 64 and oo1 <= i1 < oo1 + 4 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 and oo1 % 4 = 0 }"
"domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
"name": "Stmt_for_body8",
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, oo1, i1]: o0 <= i0 < o0 + 64 and o1 <= oo1 < o1 + 64 and o2 <= i2 < o2 + 64 and oo1 <= i1 < oo1 + 4 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 and oo1 % 4 = 0 }"
}
]
}
}

View File

@ -6,15 +6,15 @@ target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external global %struct._IO_FILE*, align 8
@A = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external dso_local global %struct._IO_FILE*, align 8
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
@C = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
; Function Attrs: nounwind uwtable
define void @init_array() #0 {
; Function Attrs: noinline nounwind uwtable
define dso_local void @init_array() #0 {
entry:
%i = alloca i32, align 4
%j = alloca i32, align 4
@ -44,12 +44,12 @@ for.body3: ; preds = %for.cond1
%conv = sitofp i32 %add to double
%div = fdiv double %conv, 2.000000e+00
%conv4 = fptrunc double %div to float
%4 = load i32, i32* %j, align 4
%4 = load i32, i32* %i, align 4
%idxprom = sext i32 %4 to i64
%5 = load i32, i32* %i, align 4
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom
%5 = load i32, i32* %j, align 4
%idxprom5 = sext i32 %5 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5
%arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
%arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom5
store float %conv4, float* %arrayidx6, align 4
%6 = load i32, i32* %i, align 4
%7 = load i32, i32* %j, align 4
@ -59,12 +59,12 @@ for.body3: ; preds = %for.cond1
%conv10 = sitofp i32 %add9 to double
%div11 = fdiv double %conv10, 2.000000e+00
%conv12 = fptrunc double %div11 to float
%8 = load i32, i32* %j, align 4
%8 = load i32, i32* %i, align 4
%idxprom13 = sext i32 %8 to i64
%9 = load i32, i32* %i, align 4
%idxprom14 = sext i32 %9 to i64
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
%arrayidx14 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom13
%9 = load i32, i32* %j, align 4
%idxprom15 = sext i32 %9 to i64
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx14, i64 0, i64 %idxprom15
store float %conv12, float* %arrayidx16, align 4
br label %for.inc
@ -87,8 +87,8 @@ for.end19: ; preds = %for.cond
ret void
}
; Function Attrs: nounwind uwtable
define void @print_array() #0 {
; Function Attrs: noinline nounwind uwtable
define dso_local void @print_array() #0 {
entry:
%i = alloca i32, align 4
%j = alloca i32, align 4
@ -111,12 +111,12 @@ for.cond1: ; preds = %for.inc, %for.body
for.body3: ; preds = %for.cond1
%2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
%3 = load i32, i32* %j, align 4
%3 = load i32, i32* %i, align 4
%idxprom = sext i32 %3 to i64
%4 = load i32, i32* %i, align 4
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom
%4 = load i32, i32* %j, align 4
%idxprom4 = sext i32 %4 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom4
%5 = load float, float* %arrayidx5, align 4
%conv = fpext float %5 to double
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
@ -154,10 +154,10 @@ for.end12: ; preds = %for.cond
ret void
}
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
; Function Attrs: nounwind uwtable
define i32 @main() #0 {
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
entry:
%retval = alloca i32, align 4
%i = alloca i32, align 4
@ -185,12 +185,12 @@ for.cond1: ; preds = %for.inc25, %for.bod
br i1 %cmp2, label %for.body3, label %for.end27
for.body3: ; preds = %for.cond1
%2 = load i32, i32* %j, align 4
%2 = load i32, i32* %i, align 4
%idxprom = sext i32 %2 to i64
%3 = load i32, i32* %i, align 4
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom
%3 = load i32, i32* %j, align 4
%idxprom4 = sext i32 %3 to i64
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom4
store float 0.000000e+00, float* %arrayidx5, align 4
store i32 0, i32* %k, align 4
br label %for.cond6
@ -201,35 +201,35 @@ for.cond6: ; preds = %for.inc, %for.body3
br i1 %cmp7, label %for.body8, label %for.end
for.body8: ; preds = %for.cond6
%5 = load i32, i32* %j, align 4
%5 = load i32, i32* %i, align 4
%idxprom9 = sext i32 %5 to i64
%6 = load i32, i32* %i, align 4
%idxprom10 = sext i32 %6 to i64
%arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10
%arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9
%arrayidx10 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom9
%6 = load i32, i32* %j, align 4
%idxprom11 = sext i32 %6 to i64
%arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx10, i64 0, i64 %idxprom11
%7 = load float, float* %arrayidx12, align 4
%8 = load i32, i32* %k, align 4
%8 = load i32, i32* %i, align 4
%idxprom13 = sext i32 %8 to i64
%9 = load i32, i32* %i, align 4
%idxprom14 = sext i32 %9 to i64
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
%arrayidx14 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom13
%9 = load i32, i32* %k, align 4
%idxprom15 = sext i32 %9 to i64
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx14, i64 0, i64 %idxprom15
%10 = load float, float* %arrayidx16, align 4
%11 = load i32, i32* %j, align 4
%11 = load i32, i32* %k, align 4
%idxprom17 = sext i32 %11 to i64
%12 = load i32, i32* %k, align 4
%idxprom18 = sext i32 %12 to i64
%arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18
%arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17
%arrayidx18 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom17
%12 = load i32, i32* %j, align 4
%idxprom19 = sext i32 %12 to i64
%arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx18, i64 0, i64 %idxprom19
%13 = load float, float* %arrayidx20, align 4
%mul = fmul float %10, %13
%add = fadd float %7, %mul
%14 = load i32, i32* %j, align 4
%14 = load i32, i32* %i, align 4
%idxprom21 = sext i32 %14 to i64
%15 = load i32, i32* %i, align 4
%idxprom22 = sext i32 %15 to i64
%arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22
%arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21
%arrayidx22 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom21
%15 = load i32, i32* %j, align 4
%idxprom23 = sext i32 %15 to i64
%arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx22, i64 0, i64 %idxprom23
store float %add, float* %arrayidx24, align 4
br label %for.inc
@ -261,9 +261,11 @@ for.end30: ; preds = %for.cond
ret i32 0
}
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}

View File

@ -1,263 +1,235 @@
.file "matmul.normalopt.ll"
.text
.file "matmul.c"
.section .rodata.cst8,"aM",@progbits,8
.align 8
.p2align 3 # -- Begin function init_array
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
leaq B(%rip), %rax
leaq A(%rip), %rcx
xorl %r8d, %r8d
vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
xorl %r9d, %r9d
.p2align 4, 0x90
.LBB0_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
movl $1, %edi
xorl %edx, %edx
.p2align 4, 0x90
.LBB0_2: # %for.body3
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
andl $1022, %esi # imm = 0x3FE
orl $1, %esi
xorps %xmm1, %xmm1
cvtsi2sdl %esi, %xmm1
mulsd %xmm0, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, -4(%rcx,%rdi,4)
movss %xmm1, -4(%rax,%rdi,4)
leal (%r9,%rdx), %esi
andl $1023, %esi # imm = 0x3FF
addl $1, %esi
xorps %xmm1, %xmm1
cvtsi2sdl %esi, %xmm1
mulsd %xmm0, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, (%rcx,%rdi,4)
movss %xmm1, (%rax,%rdi,4)
addq $2, %rdi
addl %r8d, %edx
cmpq $1537, %rdi # imm = 0x601
jne .LBB0_2
# BB#3: # %for.inc17
# %bb.3: # %for.inc17
# in Loop: Header=BB0_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
addq $1, %r9
addq $6144, %rax # imm = 0x1800
addq $6144, %rcx # imm = 0x1800
addl $2, %r8d
cmpq $1536, %r9 # imm = 0x600
jne .LBB0_1
# BB#4: # %for.end19
# %bb.4: # %for.end19
popq %rbp
ret
.Ltmp5:
.size init_array, .Ltmp5-init_array
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size init_array, .Lfunc_end0-init_array
.cfi_endproc
.globl print_array
.align 16, 0x90
# -- End function
.globl print_array # -- Begin function print_array
.p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp9:
.cfi_def_cfa_offset 16
.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
.Ltmp12:
.cfi_offset %rbx, -48
.Ltmp13:
.cfi_offset %r12, -40
.Ltmp14:
pushq %rax
.cfi_offset %rbx, -56
.cfi_offset %r12, -48
.cfi_offset %r13, -40
.cfi_offset %r14, -32
.Ltmp15:
.cfi_offset %r15, -24
xorl %r14d, %r14d
movl $C, %r15d
.align 16, 0x90
leaq C(%rip), %r13
xorl %eax, %eax
movl $3435973837, %r12d # imm = 0xCCCCCCCD
leaq .L.str(%rip), %r14
.p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq stdout(%rip), %rax
movq %r15, %r12
movq %rax, -48(%rbp) # 8-byte Spill
movq stdout(%rip), %rsi
xorl %ebx, %ebx
.align 16, 0x90
.p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
vmovss (%r12), %xmm0
vcvtss2sd %xmm0, %xmm0, %xmm0
movq %rax, %rdi
movl $.L.str, %esi
movl %ebx, %eax
imulq %r12, %rax
shrq $38, %rax
leal (%rax,%rax,4), %r15d
shll $4, %r15d
addl $79, %r15d
movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
cvtss2sd %xmm0, %xmm0
movb $1, %al
movq %rsi, %rdi
movq %r14, %rsi
callq fprintf
movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
sarq $37, %rcx
addl %edx, %ecx
imull $80, %ecx, %ecx
subl %ecx, %eax
cmpl $79, %eax
cmpl %ebx, %r15d
jne .LBB1_4
# BB#3: # %if.then
# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $4, %r12
incq %rbx
movq stdout(%rip), %rax
addq $1, %rbx
movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# BB#5: # %for.end
# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
movq %rax, %rsi
callq fputc
addq $6144, %r15 # imm = 0x1800
incq %r14
cmpq $1536, %r14 # imm = 0x600
callq fputc@PLT
movq -48(%rbp), %rax # 8-byte Reload
addq $1, %rax
addq $6144, %r13 # imm = 0x1800
cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
# BB#6: # %for.end12
# %bb.6: # %for.end12
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.Ltmp16:
.size print_array, .Ltmp16-print_array
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end1:
.size print_array, .Lfunc_end1-print_array
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
.quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
# -- End function
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp19:
.cfi_def_cfa_offset 16
.Ltmp20:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp21:
.cfi_def_cfa_register %rbp
xorl %r8d, %r8d
vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
.LBB2_1: # %for.cond1.preheader.i
callq init_array
leaq A(%rip), %rax
xorl %r10d, %r10d
leaq B(%rip), %r8
leaq C(%rip), %r9
.p2align 4, 0x90
.LBB2_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB2_2: # %for.body3.i
# Child Loop BB2_3 Depth 3
movq %r8, %rsi
xorl %edx, %edx
.p2align 4, 0x90
.LBB2_2: # %for.body3
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB2_2
# BB#3: # %for.inc17.i
# in Loop: Header=BB2_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
jne .LBB2_1
# BB#4:
xorl %r8d, %r8d
movl $A, %r9d
.align 16, 0x90
.LBB2_5: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB2_6 Depth 2
# Child Loop BB2_7 Depth 3
leaq (%r8,%r8,2), %rdx
shlq $11, %rdx
leaq C(%rdx), %rsi
xorl %edi, %edi
.align 16, 0x90
.LBB2_6: # %for.body3
# Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_7 Depth 3
movl $0, (%rsi)
vxorps %xmm0, %xmm0, %xmm0
movq $-9437184, %rax # imm = 0xFFFFFFFFFF700000
movq %r9, %rcx
.align 16, 0x90
.LBB2_7: # %for.body8
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_6 Depth=2
# Child Loop BB2_3 Depth 3
leaq (%r10,%r10,2), %rcx
shlq $11, %rcx
addq %r9, %rcx
leaq (%rcx,%rdx,4), %r11
movl $0, (%rcx,%rdx,4)
xorps %xmm0, %xmm0
movl $2, %ecx
movq %rsi, %rdi
.p2align 4, 0x90
.LBB2_3: # %for.body8
# Parent Loop BB2_1 Depth=1
# Parent Loop BB2_2 Depth=2
# => This Inner Loop Header: Depth=3
vmovss (%rcx), %xmm1
vmulss B+9437184(%rax,%rdi,4), %xmm1, %xmm1
vaddss %xmm1, %xmm0, %xmm0
addq $4, %rcx
movss -8(%rax,%rcx,4), %xmm1 # xmm1 = mem[0],zero,zero,zero
mulss (%rdi), %xmm1
movss -4(%rax,%rcx,4), %xmm2 # xmm2 = mem[0],zero,zero,zero
addss %xmm0, %xmm1
mulss 6144(%rdi), %xmm2
addss %xmm1, %xmm2
movss (%rax,%rcx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
mulss 12288(%rdi), %xmm0
addss %xmm2, %xmm0
addq $3, %rcx
addq $18432, %rdi # imm = 0x4800
cmpq $1538, %rcx # imm = 0x602
jne .LBB2_3
# %bb.4: # %for.inc25
# in Loop: Header=BB2_2 Depth=2
movss %xmm0, (%r11)
addq $1, %rdx
addq $4, %rsi
cmpq $1536, %rdx # imm = 0x600
jne .LBB2_2
# %bb.5: # %for.inc28
# in Loop: Header=BB2_1 Depth=1
addq $1, %r10
addq $6144, %rax # imm = 0x1800
jne .LBB2_7
# BB#8: # %for.inc25
# in Loop: Header=BB2_6 Depth=2
vmovss %xmm0, (%rsi)
leaq C+4(%rdx,%rdi,4), %rsi
incq %rdi
cmpq $1536, %rdi # imm = 0x600
jne .LBB2_6
# BB#9: # %for.inc28
# in Loop: Header=BB2_5 Depth=1
addq $6144, %r9 # imm = 0x1800
incq %r8
cmpq $1536, %r8 # imm = 0x600
jne .LBB2_5
# BB#10: # %for.end30
cmpq $1536, %r10 # imm = 0x600
jne .LBB2_1
# %bb.6: # %for.end30
xorl %eax, %eax
popq %rbp
ret
.Ltmp22:
.size main, .Ltmp22-main
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end2:
.size main, .Lfunc_end2-main
.cfi_endproc
# -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@ -265,10 +237,11 @@ main: # @main
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%lf "
.asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
.ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits

View File

@ -1,385 +1,645 @@
.file "matmul.polly.interchanged+tiled+vector.ll"
.text
.file "matmul.c"
.section .rodata.cst8,"aM",@progbits,8
.align 8
.p2align 3 # -- Begin function init_array
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
leaq B(%rip), %rax
leaq A(%rip), %rcx
xorl %r8d, %r8d
vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
.LBB0_1: # %polly.loop_preheader3
movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
xorl %r9d, %r9d
.p2align 4, 0x90
.LBB0_1: # %polly.loop_header
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB0_2: # %polly.loop_header2
movl $1, %edi
xorl %edx, %edx
.p2align 4, 0x90
.LBB0_2: # %polly.loop_header1
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
andl $1022, %esi # imm = 0x3FE
orl $1, %esi
xorps %xmm1, %xmm1
cvtsi2sdl %esi, %xmm1
mulsd %xmm0, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, -4(%rcx,%rdi,4)
movss %xmm1, -4(%rax,%rdi,4)
leal (%r9,%rdx), %esi
andl $1023, %esi # imm = 0x3FF
addl $1, %esi
xorps %xmm1, %xmm1
cvtsi2sdl %esi, %xmm1
mulsd %xmm0, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, (%rcx,%rdi,4)
movss %xmm1, (%rax,%rdi,4)
addq $2, %rdi
addl %r8d, %edx
cmpq $1537, %rdi # imm = 0x601
jne .LBB0_2
# BB#3: # %polly.loop_exit4
# %bb.3: # %polly.loop_exit3
# in Loop: Header=BB0_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
addq $1, %r9
addq $6144, %rax # imm = 0x1800
addq $6144, %rcx # imm = 0x1800
addl $2, %r8d
cmpq $1536, %r9 # imm = 0x600
jne .LBB0_1
# BB#4: # %polly.loop_exit
# %bb.4: # %polly.exiting
popq %rbp
ret
.Ltmp5:
.size init_array, .Ltmp5-init_array
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size init_array, .Lfunc_end0-init_array
.cfi_endproc
.globl print_array
.align 16, 0x90
# -- End function
.globl print_array # -- Begin function print_array
.p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp9:
.cfi_def_cfa_offset 16
.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
.Ltmp12:
.cfi_offset %rbx, -48
.Ltmp13:
.cfi_offset %r12, -40
.Ltmp14:
.cfi_offset %r14, -32
.Ltmp15:
.cfi_offset %r15, -24
xorl %r14d, %r14d
movl $C, %r15d
.align 16, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq stdout(%rip), %rax
movq %r15, %r12
xorl %ebx, %ebx
.align 16, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
vmovss (%r12), %xmm0
vcvtss2sd %xmm0, %xmm0, %xmm0
movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
sarq $37, %rcx
addl %edx, %ecx
imull $80, %ecx, %ecx
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
# BB#3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $4, %r12
incq %rbx
movq stdout(%rip), %rax
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# BB#5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
movq %rax, %rsi
callq fputc
addq $6144, %r15 # imm = 0x1800
incq %r14
cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
# BB#6: # %for.end12
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
.Ltmp16:
.size print_array, .Ltmp16-print_array
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
.quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp20:
.cfi_def_cfa_offset 16
.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $56, %rsp
.Ltmp23:
pushq %rax
.cfi_offset %rbx, -56
.Ltmp24:
.cfi_offset %r12, -48
.Ltmp25:
.cfi_offset %r13, -40
.Ltmp26:
.cfi_offset %r14, -32
.Ltmp27:
.cfi_offset %r15, -24
xorl %ebx, %ebx
vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %ebx, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %rbx, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB2_2
# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
incq %rbx
cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
# BB#4: # %polly.loop_preheader3.preheader
movl $C, %edi
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset
xorl %esi, %esi
movl $C+16, %eax
movq %rax, -88(%rbp) # 8-byte Spill
.align 16, 0x90
.LBB2_5: # %polly.loop_preheader17
# =>This Loop Header: Depth=1
# Child Loop BB2_15 Depth 2
# Child Loop BB2_8 Depth 3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
movq %rsi, -56(%rbp) # 8-byte Spill
movq %rsi, %rax
orq $63, %rax
movq %rax, -72(%rbp) # 8-byte Spill
leaq -1(%rax), %rax
movq %rax, -48(%rbp) # 8-byte Spill
xorl %edx, %edx
.align 16, 0x90
.LBB2_15: # %polly.loop_preheader24
# Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_8 Depth 3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
movq %rdx, -80(%rbp) # 8-byte Spill
leaq -4(%rdx), %rcx
movq %rdx, %rax
decq %rax
cmovsq %rcx, %rax
movq %rax, %r15
sarq $63, %r15
shrq $62, %r15
addq %rax, %r15
andq $-4, %r15
movq %rdx, %r13
orq $63, %r13
leaq -4(%r13), %rdx
xorl %r10d, %r10d
movq -88(%rbp), %rax # 8-byte Reload
leaq (%rax,%r15,4), %rax
movq %rax, -64(%rbp) # 8-byte Spill
leaq B+16(,%r15,4), %rbx
leaq 4(%r15), %r12
.align 16, 0x90
.LBB2_8: # %polly.loop_header23
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# => This Loop Header: Depth=3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
cmpq -72(%rbp), %rsi # 8-byte Folded Reload
jg .LBB2_13
# BB#9: # %polly.loop_header30.preheader
# in Loop: Header=BB2_8 Depth=3
movq %r10, %rax
orq $63, %rax
cmpq %rax, %r10
jg .LBB2_13
# BB#10: # in Loop: Header=BB2_8 Depth=3
decq %rax
movq -64(%rbp), %r14 # 8-byte Reload
movq -56(%rbp), %r11 # 8-byte Reload
.align 16, 0x90
.LBB2_11: # %polly.loop_header37.preheader
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# => This Loop Header: Depth=4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
cmpq %r13, %r12
movq %rbx, %r8
movq %r10, %rsi
jg .LBB2_12
.align 16, 0x90
.LBB2_17: # %polly.loop_header46.preheader
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# Parent Loop BB2_11 Depth=4
# => This Loop Header: Depth=5
# Child Loop BB2_18 Depth 6
leaq (%r11,%r11,2), %rcx
shlq $11, %rcx
vbroadcastss A(%rcx,%rsi,4), %xmm0
movq %r14, %rdi
movq %r8, %r9
movq %r15, %rcx
.LBB2_18: # %polly.loop_header46
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# Parent Loop BB2_11 Depth=4
# Parent Loop BB2_17 Depth=5
# => This Inner Loop Header: Depth=6
vmulps (%r9), %xmm0, %xmm1
vaddps (%rdi), %xmm1, %xmm1
vmovaps %xmm1, (%rdi)
addq $16, %rdi
addq $16, %r9
addq $4, %rcx
cmpq %rdx, %rcx
jle .LBB2_18
# BB#16: # %polly.loop_exit48
# in Loop: Header=BB2_17 Depth=5
addq $6144, %r8 # imm = 0x1800
cmpq %rax, %rsi
leaq 1(%rsi), %rsi
jle .LBB2_17
.align 16, 0x90
.LBB2_12: # %polly.loop_exit39
# in Loop: Header=BB2_11 Depth=4
addq $6144, %r14 # imm = 0x1800
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
leaq 1(%r11), %r11
jle .LBB2_11
.align 16, 0x90
.LBB2_13: # %polly.loop_exit32
# in Loop: Header=BB2_8 Depth=3
addq $393216, %rbx # imm = 0x60000
cmpq $1472, %r10 # imm = 0x5C0
leaq 64(%r10), %r10
movq -56(%rbp), %rsi # 8-byte Reload
jl .LBB2_8
# BB#14: # %polly.loop_exit25
# in Loop: Header=BB2_15 Depth=2
movq -80(%rbp), %rdx # 8-byte Reload
cmpq $1472, %rdx # imm = 0x5C0
leaq 64(%rdx), %rdx
jl .LBB2_15
# BB#6: # %polly.loop_exit18
# in Loop: Header=BB2_5 Depth=1
addq $393216, -88(%rbp) # 8-byte Folded Spill
# imm = 0x60000
cmpq $1472, %rsi # imm = 0x5C0
leaq 64(%rsi), %rsi
jl .LBB2_5
# BB#7: # %polly.loop_exit11
leaq C(%rip), %r13
xorl %eax, %eax
addq $56, %rsp
movl $3435973837, %r12d # imm = 0xCCCCCCCD
leaq .L.str(%rip), %r14
.p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq %rax, -48(%rbp) # 8-byte Spill
movq stdout(%rip), %rsi
xorl %ebx, %ebx
.p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ebx, %eax
imulq %r12, %rax
shrq $38, %rax
leal (%rax,%rax,4), %r15d
shll $4, %r15d
addl $79, %r15d
movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
cvtss2sd %xmm0, %xmm0
movb $1, %al
movq %rsi, %rdi
movq %r14, %rsi
callq fprintf
cmpl %ebx, %r15d
jne .LBB1_4
# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $1, %rbx
movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
callq fputc@PLT
movq -48(%rbp), %rax # 8-byte Reload
addq $1, %rax
addq $6144, %r13 # imm = 0x1800
cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
# %bb.6: # %for.end12
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.Ltmp28:
.size main, .Ltmp28-main
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end1:
.size print_array, .Lfunc_end1-print_array
.cfi_endproc
# -- End function
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $264, %rsp # imm = 0x108
.cfi_offset %rbx, -56
.cfi_offset %r12, -48
.cfi_offset %r13, -40
.cfi_offset %r14, -32
.cfi_offset %r15, -24
callq init_array
leaq C(%rip), %rdi
xorl %eax, %eax
movq %rax, -48(%rbp) # 8-byte Spill
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset@PLT
movl $64, %eax
movq %rax, -80(%rbp) # 8-byte Spill
leaq A(%rip), %rax
movq %rax, -72(%rbp) # 8-byte Spill
.p2align 4, 0x90
.LBB2_1: # %polly.loop_header8
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
# Child Loop BB2_3 Depth 3
# Child Loop BB2_4 Depth 4
# Child Loop BB2_5 Depth 5
leaq B+192(%rip), %r9
xorl %edi, %edi
xorl %eax, %eax
.p2align 4, 0x90
.LBB2_2: # %polly.loop_header14
# Parent Loop BB2_1 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_3 Depth 3
# Child Loop BB2_4 Depth 4
# Child Loop BB2_5 Depth 5
movq %rax, -168(%rbp) # 8-byte Spill
movq %rdi, -176(%rbp) # 8-byte Spill
shlq $6, %rdi
leaq 16(%rdi), %rdx
leaq 32(%rdi), %rsi
leaq 48(%rdi), %rcx
movq -72(%rbp), %r12 # 8-byte Reload
movq %r9, -184(%rbp) # 8-byte Spill
xorl %eax, %eax
.p2align 4, 0x90
.LBB2_3: # %polly.loop_header20
# Parent Loop BB2_1 Depth=1
# Parent Loop BB2_2 Depth=2
# => This Loop Header: Depth=3
# Child Loop BB2_4 Depth 4
# Child Loop BB2_5 Depth 5
movq %rax, -192(%rbp) # 8-byte Spill
movq %r12, -200(%rbp) # 8-byte Spill
movq -48(%rbp), %r14 # 8-byte Reload
.p2align 4, 0x90
.LBB2_4: # %polly.loop_header26
# Parent Loop BB2_1 Depth=1
# Parent Loop BB2_2 Depth=2
# Parent Loop BB2_3 Depth=3
# => This Loop Header: Depth=4
# Child Loop BB2_5 Depth 5
leaq (%r14,%r14,2), %rbx
shlq $11, %rbx
leaq C(%rip), %rax
addq %rax, %rbx
leaq (%rbx,%rdi,4), %r8
leaq (%rbx,%rdx,4), %r15
leaq (%rbx,%rsi,4), %r10
leaq (%rbx,%rcx,4), %r11
movups (%rbx,%rdi,4), %xmm8
movups 16(%rbx,%rdi,4), %xmm0
movaps %xmm0, -144(%rbp) # 16-byte Spill
movups 32(%rbx,%rdi,4), %xmm6
movups 48(%rbx,%rdi,4), %xmm1
movups (%rbx,%rdx,4), %xmm15
movups 16(%rbx,%rdx,4), %xmm0
movaps %xmm0, -64(%rbp) # 16-byte Spill
movups 32(%rbx,%rdx,4), %xmm0
movaps %xmm0, -96(%rbp) # 16-byte Spill
movups 48(%rbx,%rdx,4), %xmm0
movaps %xmm0, -112(%rbp) # 16-byte Spill
movups (%rbx,%rsi,4), %xmm11
movups 16(%rbx,%rsi,4), %xmm0
movaps %xmm0, -160(%rbp) # 16-byte Spill
movups 32(%rbx,%rsi,4), %xmm12
movups 48(%rbx,%rsi,4), %xmm0
movaps %xmm0, -128(%rbp) # 16-byte Spill
movups (%rbx,%rcx,4), %xmm9
movups 16(%rbx,%rcx,4), %xmm13
movups 32(%rbx,%rcx,4), %xmm2
movups 48(%rbx,%rcx,4), %xmm3
movq %r9, %rbx
movl $0, %r13d
.p2align 4, 0x90
.LBB2_5: # %vector.ph
# Parent Loop BB2_1 Depth=1
# Parent Loop BB2_2 Depth=2
# Parent Loop BB2_3 Depth=3
# Parent Loop BB2_4 Depth=4
# => This Inner Loop Header: Depth=5
movaps %xmm12, -240(%rbp) # 16-byte Spill
movaps %xmm2, -256(%rbp) # 16-byte Spill
movaps %xmm3, -272(%rbp) # 16-byte Spill
movaps %xmm8, %xmm10
movaps -144(%rbp), %xmm7 # 16-byte Reload
unpcklps %xmm7, %xmm10 # xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
movaps %xmm1, %xmm4
shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
shufps $36, %xmm4, %xmm10 # xmm10 = xmm10[0,1],xmm4[2,0]
movaps %xmm7, %xmm5
shufps $17, %xmm8, %xmm5 # xmm5 = xmm5[1,0],xmm8[1,0]
movaps %xmm6, %xmm4
unpcklps %xmm1, %xmm4 # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
shufps $226, %xmm4, %xmm5 # xmm5 = xmm5[2,0],xmm4[2,3]
movaps %xmm8, %xmm12
unpckhps %xmm7, %xmm12 # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
movaps %xmm1, %xmm4
shufps $34, %xmm6, %xmm4 # xmm4 = xmm4[2,0],xmm6[2,0]
shufps $36, %xmm4, %xmm12 # xmm12 = xmm12[0,1],xmm4[2,0]
shufps $51, %xmm8, %xmm7 # xmm7 = xmm7[3,0],xmm8[3,0]
unpckhps %xmm1, %xmm6 # xmm6 = xmm6[2],xmm1[2],xmm6[3],xmm1[3]
shufps $226, %xmm6, %xmm7 # xmm7 = xmm7[2,0],xmm6[2,3]
movaps -160(%rbx), %xmm0
movaps -144(%rbx), %xmm1
movaps %xmm1, %xmm6
shufps $0, %xmm0, %xmm6 # xmm6 = xmm6[0,0],xmm0[0,0]
movaps -192(%rbx), %xmm3
movaps -176(%rbx), %xmm4
movaps %xmm3, %xmm8
unpcklps %xmm4, %xmm8 # xmm8 = xmm8[0],xmm4[0],xmm8[1],xmm4[1]
shufps $36, %xmm6, %xmm8 # xmm8 = xmm8[0,1],xmm6[2,0]
movaps %xmm0, %xmm2
unpcklps %xmm1, %xmm2 # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
movaps %xmm4, %xmm6
shufps $17, %xmm3, %xmm6 # xmm6 = xmm6[1,0],xmm3[1,0]
shufps $226, %xmm2, %xmm6 # xmm6 = xmm6[2,0],xmm2[2,3]
movaps %xmm1, %xmm2
shufps $34, %xmm0, %xmm2 # xmm2 = xmm2[2,0],xmm0[2,0]
movaps %xmm3, %xmm14
unpckhps %xmm4, %xmm14 # xmm14 = xmm14[2],xmm4[2],xmm14[3],xmm4[3]
shufps $36, %xmm2, %xmm14 # xmm14 = xmm14[0,1],xmm2[2,0]
unpckhps %xmm1, %xmm0 # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
shufps $51, %xmm3, %xmm4 # xmm4 = xmm4[3,0],xmm3[3,0]
shufps $226, %xmm0, %xmm4 # xmm4 = xmm4[2,0],xmm0[2,3]
movss (%r12,%r13,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
mulps %xmm0, %xmm8
addps %xmm10, %xmm8
mulps %xmm0, %xmm6
addps %xmm5, %xmm6
mulps %xmm0, %xmm14
addps %xmm12, %xmm14
mulps %xmm0, %xmm4
movaps %xmm0, %xmm5
addps %xmm7, %xmm4
movaps %xmm14, %xmm0
unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
movaps %xmm6, %xmm1
shufps $51, %xmm8, %xmm1 # xmm1 = xmm1[3,0],xmm8[3,0]
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
movaps %xmm1, -304(%rbp) # 16-byte Spill
movaps %xmm4, %xmm0
shufps $34, %xmm14, %xmm0 # xmm0 = xmm0[2,0],xmm14[2,0]
movaps %xmm8, %xmm1
unpckhps %xmm6, %xmm1 # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
shufps $36, %xmm0, %xmm1 # xmm1 = xmm1[0,1],xmm0[2,0]
movaps %xmm1, -288(%rbp) # 16-byte Spill
movaps %xmm14, %xmm0
unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
movaps %xmm6, %xmm1
shufps $17, %xmm8, %xmm1 # xmm1 = xmm1[1,0],xmm8[1,0]
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
movaps %xmm1, -144(%rbp) # 16-byte Spill
shufps $0, %xmm14, %xmm4 # xmm4 = xmm4[0,0],xmm14[0,0]
unpcklps %xmm6, %xmm8 # xmm8 = xmm8[0],xmm6[0],xmm8[1],xmm6[1]
shufps $36, %xmm4, %xmm8 # xmm8 = xmm8[0,1],xmm4[2,0]
movaps %xmm15, %xmm14
movaps -64(%rbp), %xmm4 # 16-byte Reload
unpcklps %xmm4, %xmm14 # xmm14 = xmm14[0],xmm4[0],xmm14[1],xmm4[1]
movaps -112(%rbp), %xmm1 # 16-byte Reload
movaps %xmm1, %xmm0
movaps -96(%rbp), %xmm3 # 16-byte Reload
shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
shufps $36, %xmm0, %xmm14 # xmm14 = xmm14[0,1],xmm0[2,0]
movaps %xmm4, %xmm12
shufps $17, %xmm15, %xmm12 # xmm12 = xmm12[1,0],xmm15[1,0]
movaps %xmm3, %xmm2
unpcklps %xmm1, %xmm2 # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
shufps $226, %xmm2, %xmm12 # xmm12 = xmm12[2,0],xmm2[2,3]
movaps %xmm15, %xmm7
unpckhps %xmm4, %xmm7 # xmm7 = xmm7[2],xmm4[2],xmm7[3],xmm4[3]
movaps %xmm1, %xmm2
shufps $34, %xmm3, %xmm2 # xmm2 = xmm2[2,0],xmm3[2,0]
shufps $36, %xmm2, %xmm7 # xmm7 = xmm7[0,1],xmm2[2,0]
shufps $51, %xmm15, %xmm4 # xmm4 = xmm4[3,0],xmm15[3,0]
unpckhps %xmm1, %xmm3 # xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
shufps $226, %xmm3, %xmm4 # xmm4 = xmm4[2,0],xmm3[2,3]
movaps %xmm4, -64(%rbp) # 16-byte Spill
movaps -96(%rbx), %xmm2
movaps -80(%rbx), %xmm1
movaps %xmm1, %xmm4
shufps $0, %xmm2, %xmm4 # xmm4 = xmm4[0,0],xmm2[0,0]
movaps -112(%rbx), %xmm10
movaps -128(%rbx), %xmm0
movaps %xmm0, %xmm15
unpcklps %xmm10, %xmm15 # xmm15 = xmm15[0],xmm10[0],xmm15[1],xmm10[1]
shufps $36, %xmm4, %xmm15 # xmm15 = xmm15[0,1],xmm4[2,0]
movaps %xmm2, %xmm4
unpcklps %xmm1, %xmm4 # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
movaps %xmm10, %xmm6
shufps $17, %xmm0, %xmm6 # xmm6 = xmm6[1,0],xmm0[1,0]
shufps $226, %xmm4, %xmm6 # xmm6 = xmm6[2,0],xmm4[2,3]
movaps %xmm1, %xmm3
shufps $34, %xmm2, %xmm3 # xmm3 = xmm3[2,0],xmm2[2,0]
movaps %xmm0, %xmm4
unpckhps %xmm10, %xmm4 # xmm4 = xmm4[2],xmm10[2],xmm4[3],xmm10[3]
shufps $36, %xmm3, %xmm4 # xmm4 = xmm4[0,1],xmm3[2,0]
unpckhps %xmm1, %xmm2 # xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
shufps $51, %xmm0, %xmm10 # xmm10 = xmm10[3,0],xmm0[3,0]
shufps $226, %xmm2, %xmm10 # xmm10 = xmm10[2,0],xmm2[2,3]
movaps %xmm5, -224(%rbp) # 16-byte Spill
mulps %xmm5, %xmm15
addps %xmm14, %xmm15
mulps %xmm5, %xmm6
addps %xmm12, %xmm6
mulps %xmm5, %xmm4
addps %xmm7, %xmm4
mulps %xmm5, %xmm10
addps -64(%rbp), %xmm10 # 16-byte Folded Reload
movaps %xmm4, %xmm0
unpckhps %xmm10, %xmm0 # xmm0 = xmm0[2],xmm10[2],xmm0[3],xmm10[3]
movaps %xmm6, %xmm1
shufps $51, %xmm15, %xmm1 # xmm1 = xmm1[3,0],xmm15[3,0]
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
movaps %xmm1, -112(%rbp) # 16-byte Spill
movaps %xmm10, %xmm0
shufps $34, %xmm4, %xmm0 # xmm0 = xmm0[2,0],xmm4[2,0]
movaps %xmm15, %xmm1
unpckhps %xmm6, %xmm1 # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
shufps $36, %xmm0, %xmm1 # xmm1 = xmm1[0,1],xmm0[2,0]
movaps %xmm1, -96(%rbp) # 16-byte Spill
movaps %xmm4, %xmm0
unpcklps %xmm10, %xmm0 # xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
movaps %xmm6, %xmm1
shufps $17, %xmm15, %xmm1 # xmm1 = xmm1[1,0],xmm15[1,0]
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
movaps %xmm1, -64(%rbp) # 16-byte Spill
shufps $0, %xmm4, %xmm10 # xmm10 = xmm10[0,0],xmm4[0,0]
unpcklps %xmm6, %xmm15 # xmm15 = xmm15[0],xmm6[0],xmm15[1],xmm6[1]
shufps $36, %xmm10, %xmm15 # xmm15 = xmm15[0,1],xmm10[2,0]
movaps %xmm11, %xmm10
movaps -160(%rbp), %xmm14 # 16-byte Reload
unpcklps %xmm14, %xmm10 # xmm10 = xmm10[0],xmm14[0],xmm10[1],xmm14[1]
movaps -128(%rbp), %xmm2 # 16-byte Reload
movaps %xmm2, %xmm0
movaps -240(%rbp), %xmm3 # 16-byte Reload
shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
shufps $36, %xmm0, %xmm10 # xmm10 = xmm10[0,1],xmm0[2,0]
movaps %xmm14, %xmm12
shufps $17, %xmm11, %xmm12 # xmm12 = xmm12[1,0],xmm11[1,0]
movaps %xmm3, %xmm0
unpcklps %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
shufps $226, %xmm0, %xmm12 # xmm12 = xmm12[2,0],xmm0[2,3]
movaps %xmm11, %xmm0
unpckhps %xmm14, %xmm0 # xmm0 = xmm0[2],xmm14[2],xmm0[3],xmm14[3]
movaps %xmm2, %xmm1
shufps $34, %xmm3, %xmm1 # xmm1 = xmm1[2,0],xmm3[2,0]
shufps $36, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,0]
shufps $51, %xmm11, %xmm14 # xmm14 = xmm14[3,0],xmm11[3,0]
unpckhps %xmm2, %xmm3 # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
shufps $226, %xmm3, %xmm14 # xmm14 = xmm14[2,0],xmm3[2,3]
movaps -32(%rbx), %xmm1
movaps -16(%rbx), %xmm2
movaps %xmm2, %xmm3
shufps $0, %xmm1, %xmm3 # xmm3 = xmm3[0,0],xmm1[0,0]
movaps -48(%rbx), %xmm4
movaps -64(%rbx), %xmm5
movaps %xmm5, %xmm11
unpcklps %xmm4, %xmm11 # xmm11 = xmm11[0],xmm4[0],xmm11[1],xmm4[1]
shufps $36, %xmm3, %xmm11 # xmm11 = xmm11[0,1],xmm3[2,0]
movaps %xmm1, %xmm3
unpcklps %xmm2, %xmm3 # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
movaps %xmm4, %xmm7
shufps $17, %xmm5, %xmm7 # xmm7 = xmm7[1,0],xmm5[1,0]
shufps $226, %xmm3, %xmm7 # xmm7 = xmm7[2,0],xmm3[2,3]
movaps %xmm2, %xmm3
shufps $34, %xmm1, %xmm3 # xmm3 = xmm3[2,0],xmm1[2,0]
movaps %xmm5, %xmm6
unpckhps %xmm4, %xmm6 # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
shufps $36, %xmm3, %xmm6 # xmm6 = xmm6[0,1],xmm3[2,0]
unpckhps %xmm2, %xmm1 # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
shufps $51, %xmm5, %xmm4 # xmm4 = xmm4[3,0],xmm5[3,0]
shufps $226, %xmm1, %xmm4 # xmm4 = xmm4[2,0],xmm1[2,3]
movaps -224(%rbp), %xmm1 # 16-byte Reload
mulps %xmm1, %xmm11
addps %xmm10, %xmm11
mulps %xmm1, %xmm7
addps %xmm12, %xmm7
mulps %xmm1, %xmm6
addps %xmm0, %xmm6
mulps %xmm1, %xmm4
addps %xmm14, %xmm4
movaps %xmm6, %xmm0
unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
movaps %xmm7, %xmm1
shufps $51, %xmm11, %xmm1 # xmm1 = xmm1[3,0],xmm11[3,0]
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
movaps %xmm1, -128(%rbp) # 16-byte Spill
movaps %xmm4, %xmm0
shufps $34, %xmm6, %xmm0 # xmm0 = xmm0[2,0],xmm6[2,0]
movaps %xmm11, %xmm12
unpckhps %xmm7, %xmm12 # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
shufps $36, %xmm0, %xmm12 # xmm12 = xmm12[0,1],xmm0[2,0]
movaps %xmm6, %xmm0
unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
movaps %xmm7, %xmm1
shufps $17, %xmm11, %xmm1 # xmm1 = xmm1[1,0],xmm11[1,0]
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
movaps %xmm1, -160(%rbp) # 16-byte Spill
shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
unpcklps %xmm7, %xmm11 # xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1]
shufps $36, %xmm4, %xmm11 # xmm11 = xmm11[0,1],xmm4[2,0]
movaps %xmm9, %xmm10
unpcklps %xmm13, %xmm10 # xmm10 = xmm10[0],xmm13[0],xmm10[1],xmm13[1]
movaps -272(%rbp), %xmm2 # 16-byte Reload
movaps %xmm2, %xmm0
movaps -256(%rbp), %xmm3 # 16-byte Reload
shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
shufps $36, %xmm0, %xmm10 # xmm10 = xmm10[0,1],xmm0[2,0]
movaps %xmm13, %xmm14
shufps $17, %xmm9, %xmm14 # xmm14 = xmm14[1,0],xmm9[1,0]
movaps %xmm3, %xmm0
unpcklps %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
shufps $226, %xmm0, %xmm14 # xmm14 = xmm14[2,0],xmm0[2,3]
movaps %xmm9, %xmm0
unpckhps %xmm13, %xmm0 # xmm0 = xmm0[2],xmm13[2],xmm0[3],xmm13[3]
movaps %xmm2, %xmm1
shufps $34, %xmm3, %xmm1 # xmm1 = xmm1[2,0],xmm3[2,0]
shufps $36, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,0]
shufps $51, %xmm9, %xmm13 # xmm13 = xmm13[3,0],xmm9[3,0]
unpckhps %xmm2, %xmm3 # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
shufps $226, %xmm3, %xmm13 # xmm13 = xmm13[2,0],xmm3[2,3]
movaps 32(%rbx), %xmm1
movaps 48(%rbx), %xmm2
movaps %xmm2, %xmm3
shufps $0, %xmm1, %xmm3 # xmm3 = xmm3[0,0],xmm1[0,0]
movaps 16(%rbx), %xmm4
movaps (%rbx), %xmm5
movaps %xmm5, %xmm9
unpcklps %xmm4, %xmm9 # xmm9 = xmm9[0],xmm4[0],xmm9[1],xmm4[1]
shufps $36, %xmm3, %xmm9 # xmm9 = xmm9[0,1],xmm3[2,0]
movaps %xmm1, %xmm3
unpcklps %xmm2, %xmm3 # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
movaps %xmm4, %xmm7
shufps $17, %xmm5, %xmm7 # xmm7 = xmm7[1,0],xmm5[1,0]
shufps $226, %xmm3, %xmm7 # xmm7 = xmm7[2,0],xmm3[2,3]
movaps %xmm2, %xmm3
shufps $34, %xmm1, %xmm3 # xmm3 = xmm3[2,0],xmm1[2,0]
movaps %xmm5, %xmm6
unpckhps %xmm4, %xmm6 # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
shufps $36, %xmm3, %xmm6 # xmm6 = xmm6[0,1],xmm3[2,0]
unpckhps %xmm2, %xmm1 # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
shufps $51, %xmm5, %xmm4 # xmm4 = xmm4[3,0],xmm5[3,0]
shufps $226, %xmm1, %xmm4 # xmm4 = xmm4[2,0],xmm1[2,3]
movaps -224(%rbp), %xmm1 # 16-byte Reload
mulps %xmm1, %xmm9
addps %xmm10, %xmm9
mulps %xmm1, %xmm7
addps %xmm14, %xmm7
mulps %xmm1, %xmm6
addps %xmm0, %xmm6
mulps %xmm1, %xmm4
addps %xmm13, %xmm4
movaps %xmm6, %xmm0
unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
movaps %xmm7, %xmm3
shufps $51, %xmm9, %xmm3 # xmm3 = xmm3[3,0],xmm9[3,0]
shufps $226, %xmm0, %xmm3 # xmm3 = xmm3[2,0],xmm0[2,3]
movaps %xmm4, %xmm0
shufps $34, %xmm6, %xmm0 # xmm0 = xmm0[2,0],xmm6[2,0]
movaps %xmm9, %xmm2
unpckhps %xmm7, %xmm2 # xmm2 = xmm2[2],xmm7[2],xmm2[3],xmm7[3]
shufps $36, %xmm0, %xmm2 # xmm2 = xmm2[0,1],xmm0[2,0]
movaps %xmm6, %xmm0
unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
movaps %xmm7, %xmm13
shufps $17, %xmm9, %xmm13 # xmm13 = xmm13[1,0],xmm9[1,0]
shufps $226, %xmm0, %xmm13 # xmm13 = xmm13[2,0],xmm0[2,3]
shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
movaps -288(%rbp), %xmm6 # 16-byte Reload
movaps -304(%rbp), %xmm1 # 16-byte Reload
unpcklps %xmm7, %xmm9 # xmm9 = xmm9[0],xmm7[0],xmm9[1],xmm7[1]
shufps $36, %xmm4, %xmm9 # xmm9 = xmm9[0,1],xmm4[2,0]
addq $1, %r13
addq $6144, %rbx # imm = 0x1800
cmpq $64, %r13
jne .LBB2_5
# %bb.6: # %polly.loop_exit34
# in Loop: Header=BB2_4 Depth=4
movups %xmm8, (%r8)
movaps -144(%rbp), %xmm0 # 16-byte Reload
movups %xmm0, 16(%r8)
movups %xmm6, 32(%r8)
movups %xmm1, 48(%r8)
movaps -112(%rbp), %xmm0 # 16-byte Reload
movups %xmm0, 48(%r15)
movaps -96(%rbp), %xmm0 # 16-byte Reload
movups %xmm0, 32(%r15)
movaps -64(%rbp), %xmm0 # 16-byte Reload
movups %xmm0, 16(%r15)
movups %xmm15, (%r15)
movaps -128(%rbp), %xmm0 # 16-byte Reload
movups %xmm0, 48(%r10)
movaps -160(%rbp), %xmm0 # 16-byte Reload
movups %xmm0, 16(%r10)
movups %xmm11, (%r10)
movups %xmm12, 32(%r10)
movups %xmm3, 48(%r11)
movups %xmm13, 16(%r11)
movups %xmm9, (%r11)
movups %xmm2, 32(%r11)
addq $1, %r14
addq $6144, %r12 # imm = 0x1800
cmpq -80(%rbp), %r14 # 8-byte Folded Reload
jne .LBB2_4
# %bb.7: # %polly.loop_exit28
# in Loop: Header=BB2_3 Depth=3
movq -192(%rbp), %rax # 8-byte Reload
addq $64, %rax
addq $393216, %r9 # imm = 0x60000
movq -200(%rbp), %r12 # 8-byte Reload
addq $256, %r12 # imm = 0x100
cmpq $1536, %rax # imm = 0x600
jb .LBB2_3
# %bb.8: # %polly.loop_exit22
# in Loop: Header=BB2_2 Depth=2
movq -168(%rbp), %rax # 8-byte Reload
addq $64, %rax
movq -176(%rbp), %rdi # 8-byte Reload
addq $1, %rdi
movq -184(%rbp), %r9 # 8-byte Reload
addq $256, %r9 # imm = 0x100
cmpq $1536, %rax # imm = 0x600
jb .LBB2_2
# %bb.9: # %polly.loop_exit16
# in Loop: Header=BB2_1 Depth=1
movq -48(%rbp), %rax # 8-byte Reload
movq %rax, %rcx
addq $64, %rcx
addq $64, -80(%rbp) # 8-byte Folded Spill
addq $393216, -72(%rbp) # 8-byte Folded Spill
# imm = 0x60000
movq %rcx, %rax
movq %rcx, -48(%rbp) # 8-byte Spill
cmpq $1536, %rcx # imm = 0x600
jb .LBB2_1
# %bb.10: # %polly.exiting
xorl %eax, %eax
addq $264, %rsp # imm = 0x108
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end2:
.size main, .Lfunc_end2-main
.cfi_endproc
# -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@ -387,10 +647,11 @@ main: # @main
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%lf "
.asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
.ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits

View File

@ -1,379 +1,495 @@
.file "matmul.polly.interchanged+tiled.ll"
.text
.file "matmul.c"
.section .rodata.cst8,"aM",@progbits,8
.align 8
.p2align 3 # -- Begin function init_array
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
leaq B(%rip), %rax
leaq A(%rip), %rcx
xorl %r8d, %r8d
vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
.LBB0_1: # %polly.loop_preheader3
movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
xorl %r9d, %r9d
.p2align 4, 0x90
.LBB0_1: # %polly.loop_header
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB0_2: # %polly.loop_header2
movl $1, %edi
xorl %edx, %edx
.p2align 4, 0x90
.LBB0_2: # %polly.loop_header1
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
andl $1022, %esi # imm = 0x3FE
orl $1, %esi
xorps %xmm1, %xmm1
cvtsi2sdl %esi, %xmm1
mulsd %xmm0, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, -4(%rcx,%rdi,4)
movss %xmm1, -4(%rax,%rdi,4)
leal (%r9,%rdx), %esi
andl $1023, %esi # imm = 0x3FF
addl $1, %esi
xorps %xmm1, %xmm1
cvtsi2sdl %esi, %xmm1
mulsd %xmm0, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, (%rcx,%rdi,4)
movss %xmm1, (%rax,%rdi,4)
addq $2, %rdi
addl %r8d, %edx
cmpq $1537, %rdi # imm = 0x601
jne .LBB0_2
# BB#3: # %polly.loop_exit4
# %bb.3: # %polly.loop_exit3
# in Loop: Header=BB0_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
addq $1, %r9
addq $6144, %rax # imm = 0x1800
addq $6144, %rcx # imm = 0x1800
addl $2, %r8d
cmpq $1536, %r9 # imm = 0x600
jne .LBB0_1
# BB#4: # %polly.loop_exit
# %bb.4: # %polly.exiting
popq %rbp
ret
.Ltmp5:
.size init_array, .Ltmp5-init_array
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size init_array, .Lfunc_end0-init_array
.cfi_endproc
.globl print_array
.align 16, 0x90
# -- End function
.globl print_array # -- Begin function print_array
.p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp9:
.cfi_def_cfa_offset 16
.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
.Ltmp12:
.cfi_offset %rbx, -48
.Ltmp13:
.cfi_offset %r12, -40
.Ltmp14:
.cfi_offset %r14, -32
.Ltmp15:
.cfi_offset %r15, -24
xorl %r14d, %r14d
movl $C, %r15d
.align 16, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq stdout(%rip), %rax
movq %r15, %r12
xorl %ebx, %ebx
.align 16, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
vmovss (%r12), %xmm0
vcvtss2sd %xmm0, %xmm0, %xmm0
movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
sarq $37, %rcx
addl %edx, %ecx
imull $80, %ecx, %ecx
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
# BB#3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $4, %r12
incq %rbx
movq stdout(%rip), %rax
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# BB#5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
movq %rax, %rsi
callq fputc
addq $6144, %r15 # imm = 0x1800
incq %r14
cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
# BB#6: # %for.end12
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
.Ltmp16:
.size print_array, .Ltmp16-print_array
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
.quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0: # %entry
pushq %rbp
.Ltmp20:
.cfi_def_cfa_offset 16
.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $56, %rsp
.Ltmp23:
pushq %rax
.cfi_offset %rbx, -56
.Ltmp24:
.cfi_offset %r12, -48
.Ltmp25:
.cfi_offset %r13, -40
.Ltmp26:
.cfi_offset %r14, -32
.Ltmp27:
.cfi_offset %r15, -24
leaq C(%rip), %r13
xorl %eax, %eax
movl $3435973837, %r12d # imm = 0xCCCCCCCD
leaq .L.str(%rip), %r14
.p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq %rax, -48(%rbp) # 8-byte Spill
movq stdout(%rip), %rsi
xorl %ebx, %ebx
vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
.p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %ebx, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %rbx, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB2_2
# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
incq %rbx
movl %ebx, %eax
imulq %r12, %rax
shrq $38, %rax
leal (%rax,%rax,4), %r15d
shll $4, %r15d
addl $79, %r15d
movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
cvtss2sd %xmm0, %xmm0
movb $1, %al
movq %rsi, %rdi
movq %r14, %rsi
callq fprintf
cmpl %ebx, %r15d
jne .LBB1_4
# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $1, %rbx
movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
# BB#4: # %polly.loop_preheader3.preheader
movl $C, %ebx
movl $C, %edi
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset
xorl %eax, %eax
.align 16, 0x90
.LBB2_5: # %polly.loop_preheader17
# =>This Loop Header: Depth=1
# Child Loop BB2_15 Depth 2
# Child Loop BB2_8 Depth 3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
movq %rax, -56(%rbp) # 8-byte Spill
movq %rbx, -88(%rbp) # 8-byte Spill
movq %rax, %rcx
orq $63, %rcx
movq %rcx, -72(%rbp) # 8-byte Spill
leaq -1(%rcx), %rcx
movq %rcx, -48(%rbp) # 8-byte Spill
movq $-1, %r15
movl $B, %ecx
movq %rbx, -64(%rbp) # 8-byte Spill
xorl %r12d, %r12d
.align 16, 0x90
.LBB2_15: # %polly.loop_preheader24
# Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_8 Depth 3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
movq %rcx, -80(%rbp) # 8-byte Spill
movq %r12, %r13
orq $63, %r13
leaq -1(%r13), %rbx
xorl %r9d, %r9d
movq %rcx, %rdx
.align 16, 0x90
.LBB2_8: # %polly.loop_header23
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# => This Loop Header: Depth=3
# Child Loop BB2_11 Depth 4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
cmpq -72(%rbp), %rax # 8-byte Folded Reload
jg .LBB2_13
# BB#9: # %polly.loop_header30.preheader
# in Loop: Header=BB2_8 Depth=3
movq %r9, %rax
orq $63, %rax
cmpq %rax, %r9
jg .LBB2_13
# BB#10: # in Loop: Header=BB2_8 Depth=3
decq %rax
movq -64(%rbp), %r10 # 8-byte Reload
movq -56(%rbp), %r11 # 8-byte Reload
.align 16, 0x90
.LBB2_11: # %polly.loop_header37.preheader
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# => This Loop Header: Depth=4
# Child Loop BB2_17 Depth 5
# Child Loop BB2_18 Depth 6
cmpq %r13, %r12
movq %rdx, %r14
movq %r9, %rcx
jg .LBB2_12
.align 16, 0x90
.LBB2_17: # %polly.loop_header46.preheader
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# Parent Loop BB2_11 Depth=4
# => This Loop Header: Depth=5
# Child Loop BB2_18 Depth 6
leaq (%r11,%r11,2), %rsi
shlq $11, %rsi
vmovss A(%rsi,%rcx,4), %xmm0
movq %r10, %rdi
movq %r14, %r8
movq %r15, %rsi
.LBB2_18: # %polly.loop_header46
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_15 Depth=2
# Parent Loop BB2_8 Depth=3
# Parent Loop BB2_11 Depth=4
# Parent Loop BB2_17 Depth=5
# => This Inner Loop Header: Depth=6
vmulss (%r8), %xmm0, %xmm1
vaddss (%rdi), %xmm1, %xmm1
vmovss %xmm1, (%rdi)
addq $4, %rdi
addq $4, %r8
incq %rsi
cmpq %rbx, %rsi
jle .LBB2_18
# BB#16: # %polly.loop_exit48
# in Loop: Header=BB2_17 Depth=5
addq $6144, %r14 # imm = 0x1800
cmpq %rax, %rcx
leaq 1(%rcx), %rcx
jle .LBB2_17
.align 16, 0x90
.LBB2_12: # %polly.loop_exit39
# in Loop: Header=BB2_11 Depth=4
addq $6144, %r10 # imm = 0x1800
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
leaq 1(%r11), %r11
jle .LBB2_11
.align 16, 0x90
.LBB2_13: # %polly.loop_exit32
# in Loop: Header=BB2_8 Depth=3
addq $393216, %rdx # imm = 0x60000
cmpq $1472, %r9 # imm = 0x5C0
leaq 64(%r9), %r9
movq -56(%rbp), %rax # 8-byte Reload
jl .LBB2_8
# BB#14: # %polly.loop_exit25
# in Loop: Header=BB2_15 Depth=2
addq $256, -64(%rbp) # 8-byte Folded Spill
# imm = 0x100
movq -80(%rbp), %rcx # 8-byte Reload
addq $256, %rcx # imm = 0x100
addq $64, %r15
cmpq $1472, %r12 # imm = 0x5C0
leaq 64(%r12), %r12
jl .LBB2_15
# BB#6: # %polly.loop_exit18
# in Loop: Header=BB2_5 Depth=1
movq -88(%rbp), %rbx # 8-byte Reload
addq $393216, %rbx # imm = 0x60000
cmpq $1472, %rax # imm = 0x5C0
leaq 64(%rax), %rax
jl .LBB2_5
# BB#7: # %polly.loop_exit11
xorl %eax, %eax
addq $56, %rsp
jne .LBB1_2
# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
callq fputc@PLT
movq -48(%rbp), %rax # 8-byte Reload
addq $1, %rax
addq $6144, %r13 # imm = 0x1800
cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
# %bb.6: # %for.end12
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.Ltmp28:
.size main, .Ltmp28-main
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end1:
.size print_array, .Lfunc_end1-print_array
.cfi_endproc
# -- End function
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $344, %rsp # imm = 0x158
.cfi_offset %rbx, -56
.cfi_offset %r12, -48
.cfi_offset %r13, -40
.cfi_offset %r14, -32
.cfi_offset %r15, -24
callq init_array
leaq C(%rip), %rdi
xorl %eax, %eax
movq %rax, -48(%rbp) # 8-byte Spill
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset@PLT
movl $64, %eax
movq %rax, -64(%rbp) # 8-byte Spill
leaq A(%rip), %rax
movq %rax, -56(%rbp) # 8-byte Spill
.p2align 4, 0x90
.LBB2_1: # %polly.loop_header8
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
# Child Loop BB2_3 Depth 3
# Child Loop BB2_4 Depth 4
# Child Loop BB2_5 Depth 5
leaq B+240(%rip), %rax
xorl %edi, %edi
.p2align 4, 0x90
.LBB2_2: # %polly.loop_header14
# Parent Loop BB2_1 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_3 Depth 3
# Child Loop BB2_4 Depth 4
# Child Loop BB2_5 Depth 5
movq %rdi, %rcx
orq $4, %rcx
movq %rcx, -80(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $8, %rcx
movq %rcx, -264(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $12, %rcx
movq %rcx, -256(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $16, %rcx
movq %rcx, -248(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $20, %rcx
movq %rcx, -240(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $24, %rcx
movq %rcx, -232(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $28, %rcx
movq %rcx, -224(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $32, %rcx
movq %rcx, -216(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $36, %rcx
movq %rcx, -208(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $40, %rcx
movq %rcx, -200(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $44, %rcx
movq %rcx, -192(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $48, %rcx
movq %rcx, -184(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $52, %rcx
movq %rcx, -176(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $56, %rcx
movq %rcx, -168(%rbp) # 8-byte Spill
movq %rdi, %rcx
orq $60, %rcx
movq %rcx, -160(%rbp) # 8-byte Spill
movq -56(%rbp), %rdx # 8-byte Reload
movq %rax, -136(%rbp) # 8-byte Spill
movq %rax, -72(%rbp) # 8-byte Spill
xorl %eax, %eax
movq %rdi, -272(%rbp) # 8-byte Spill
.p2align 4, 0x90
.LBB2_3: # %polly.loop_header20
# Parent Loop BB2_1 Depth=1
# Parent Loop BB2_2 Depth=2
# => This Loop Header: Depth=3
# Child Loop BB2_4 Depth 4
# Child Loop BB2_5 Depth 5
movq %rax, -144(%rbp) # 8-byte Spill
movq %rdx, -152(%rbp) # 8-byte Spill
movq -48(%rbp), %rax # 8-byte Reload
.p2align 4, 0x90
.LBB2_4: # %polly.loop_header26
# Parent Loop BB2_1 Depth=1
# Parent Loop BB2_2 Depth=2
# Parent Loop BB2_3 Depth=3
# => This Loop Header: Depth=4
# Child Loop BB2_5 Depth 5
movq %rax, -376(%rbp) # 8-byte Spill
leaq (%rax,%rax,2), %rax
shlq $11, %rax
leaq C(%rip), %rsi
addq %rsi, %rax
leaq (%rax,%rdi,4), %rcx
movq %rcx, -368(%rbp) # 8-byte Spill
movq -80(%rbp), %rcx # 8-byte Reload
leaq (%rax,%rcx,4), %rcx
movq %rcx, -360(%rbp) # 8-byte Spill
movq -264(%rbp), %rbx # 8-byte Reload
leaq (%rax,%rbx,4), %rcx
movq %rcx, -352(%rbp) # 8-byte Spill
movq -256(%rbp), %r8 # 8-byte Reload
movq %rdi, %rsi
leaq (%rax,%r8,4), %rdi
movq %rdi, -344(%rbp) # 8-byte Spill
movq -248(%rbp), %rdi # 8-byte Reload
leaq (%rax,%rdi,4), %rcx
movq %rcx, -336(%rbp) # 8-byte Spill
movq -240(%rbp), %r9 # 8-byte Reload
leaq (%rax,%r9,4), %rcx
movq %rcx, -328(%rbp) # 8-byte Spill
movq -232(%rbp), %r10 # 8-byte Reload
leaq (%rax,%r10,4), %rcx
movq %rcx, -320(%rbp) # 8-byte Spill
movq -224(%rbp), %r14 # 8-byte Reload
leaq (%rax,%r14,4), %rcx
movq %rcx, -312(%rbp) # 8-byte Spill
movq -216(%rbp), %r15 # 8-byte Reload
leaq (%rax,%r15,4), %rcx
movq %rcx, -304(%rbp) # 8-byte Spill
movq -208(%rbp), %r12 # 8-byte Reload
leaq (%rax,%r12,4), %rcx
movq %rcx, -296(%rbp) # 8-byte Spill
movq -200(%rbp), %r13 # 8-byte Reload
leaq (%rax,%r13,4), %rcx
movq %rcx, -288(%rbp) # 8-byte Spill
movq -192(%rbp), %r11 # 8-byte Reload
leaq (%rax,%r11,4), %rcx
movq %rcx, -280(%rbp) # 8-byte Spill
movaps (%rax,%rsi,4), %xmm15
movq -80(%rbp), %rcx # 8-byte Reload
movaps (%rax,%rcx,4), %xmm14
movaps (%rax,%rbx,4), %xmm13
movaps (%rax,%r8,4), %xmm12
movaps (%rax,%rdi,4), %xmm11
movaps (%rax,%r9,4), %xmm10
movaps (%rax,%r10,4), %xmm9
movaps (%rax,%r14,4), %xmm8
movaps (%rax,%r15,4), %xmm7
movaps (%rax,%r12,4), %xmm6
movaps (%rax,%r13,4), %xmm5
movaps (%rax,%r11,4), %xmm4
movq -184(%rbp), %rcx # 8-byte Reload
movaps (%rax,%rcx,4), %xmm3
movq -176(%rbp), %rsi # 8-byte Reload
movaps (%rax,%rsi,4), %xmm0
movaps %xmm0, -96(%rbp) # 16-byte Spill
movq -168(%rbp), %rbx # 8-byte Reload
movaps (%rax,%rbx,4), %xmm0
movaps %xmm0, -112(%rbp) # 16-byte Spill
movq -160(%rbp), %rdi # 8-byte Reload
movaps (%rax,%rdi,4), %xmm0
movaps %xmm0, -128(%rbp) # 16-byte Spill
leaq (%rax,%rcx,4), %r8
leaq (%rax,%rsi,4), %rcx
leaq (%rax,%rbx,4), %rsi
leaq (%rax,%rdi,4), %rax
movq -72(%rbp), %r9 # 8-byte Reload
movl $0, %r10d
.p2align 4, 0x90
.LBB2_5: # %vector.ph
# Parent Loop BB2_1 Depth=1
# Parent Loop BB2_2 Depth=2
# Parent Loop BB2_3 Depth=3
# Parent Loop BB2_4 Depth=4
# => This Inner Loop Header: Depth=5
movss (%rdx,%r10,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
movaps -240(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm15
movaps -224(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm14
movaps -208(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm13
movaps -192(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm12
movaps -176(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm11
movaps -160(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm10
movaps -144(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm9
movaps -128(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm8
movaps -112(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm7
movaps -96(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm6
movaps -80(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm5
movaps -64(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm4
movaps -48(%r9), %xmm1
mulps %xmm0, %xmm1
addps %xmm1, %xmm3
movaps -32(%r9), %xmm1
mulps %xmm0, %xmm1
movaps -96(%rbp), %xmm2 # 16-byte Reload
addps %xmm1, %xmm2
movaps %xmm2, -96(%rbp) # 16-byte Spill
movaps -16(%r9), %xmm1
mulps %xmm0, %xmm1
movaps -112(%rbp), %xmm2 # 16-byte Reload
addps %xmm1, %xmm2
movaps %xmm2, -112(%rbp) # 16-byte Spill
mulps (%r9), %xmm0
movaps -128(%rbp), %xmm1 # 16-byte Reload
addps %xmm0, %xmm1
movaps %xmm1, -128(%rbp) # 16-byte Spill
addq $1, %r10
addq $6144, %r9 # imm = 0x1800
cmpq $64, %r10
jne .LBB2_5
# %bb.6: # %polly.loop_exit34
# in Loop: Header=BB2_4 Depth=4
movq -368(%rbp), %rdi # 8-byte Reload
movaps %xmm15, (%rdi)
movq -360(%rbp), %rdi # 8-byte Reload
movaps %xmm14, (%rdi)
movq -352(%rbp), %rdi # 8-byte Reload
movaps %xmm13, (%rdi)
movq -344(%rbp), %rdi # 8-byte Reload
movaps %xmm12, (%rdi)
movq -336(%rbp), %rdi # 8-byte Reload
movaps %xmm11, (%rdi)
movq -328(%rbp), %rdi # 8-byte Reload
movaps %xmm10, (%rdi)
movq -320(%rbp), %rdi # 8-byte Reload
movaps %xmm9, (%rdi)
movq -312(%rbp), %rdi # 8-byte Reload
movaps %xmm8, (%rdi)
movq -304(%rbp), %rdi # 8-byte Reload
movaps %xmm7, (%rdi)
movq -296(%rbp), %rdi # 8-byte Reload
movaps %xmm6, (%rdi)
movq -288(%rbp), %rdi # 8-byte Reload
movaps %xmm5, (%rdi)
movq -280(%rbp), %rdi # 8-byte Reload
movaps %xmm4, (%rdi)
movaps %xmm3, (%r8)
movaps -96(%rbp), %xmm0 # 16-byte Reload
movaps %xmm0, (%rcx)
movaps -112(%rbp), %xmm0 # 16-byte Reload
movaps %xmm0, (%rsi)
movaps -128(%rbp), %xmm0 # 16-byte Reload
movaps %xmm0, (%rax)
movq -376(%rbp), %rax # 8-byte Reload
addq $1, %rax
addq $6144, %rdx # imm = 0x1800
cmpq -64(%rbp), %rax # 8-byte Folded Reload
movq -272(%rbp), %rdi # 8-byte Reload
jne .LBB2_4
# %bb.7: # %polly.loop_exit28
# in Loop: Header=BB2_3 Depth=3
movq -144(%rbp), %rax # 8-byte Reload
addq $64, %rax
addq $393216, -72(%rbp) # 8-byte Folded Spill
# imm = 0x60000
movq -152(%rbp), %rdx # 8-byte Reload
addq $256, %rdx # imm = 0x100
cmpq $1536, %rax # imm = 0x600
jb .LBB2_3
# %bb.8: # %polly.loop_exit22
# in Loop: Header=BB2_2 Depth=2
addq $64, %rdi
movq -136(%rbp), %rax # 8-byte Reload
addq $256, %rax # imm = 0x100
cmpq $1536, %rdi # imm = 0x600
jb .LBB2_2
# %bb.9: # %polly.loop_exit16
# in Loop: Header=BB2_1 Depth=1
movq -48(%rbp), %rax # 8-byte Reload
movq %rax, %rcx
addq $64, %rcx
addq $64, -64(%rbp) # 8-byte Folded Spill
addq $393216, -56(%rbp) # 8-byte Folded Spill
# imm = 0x60000
movq %rcx, %rax
movq %rcx, -48(%rbp) # 8-byte Spill
cmpq $1536, %rcx # imm = 0x600
jb .LBB2_1
# %bb.10: # %polly.exiting
xorl %eax, %eax
addq $344, %rsp # imm = 0x158
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end2:
.size main, .Lfunc_end2-main
.cfi_endproc
# -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@ -381,10 +497,11 @@ main: # @main
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%lf "
.asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
.ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits

View File

@ -1,275 +1,248 @@
.file "matmul.polly.interchanged.ll"
.text
.file "matmul.c"
.section .rodata.cst8,"aM",@progbits,8
.align 8
.p2align 3 # -- Begin function init_array
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
leaq B(%rip), %rax
leaq A(%rip), %rcx
xorl %r8d, %r8d
vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
.LBB0_1: # %polly.loop_preheader3
movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
xorl %r9d, %r9d
.p2align 4, 0x90
.LBB0_1: # %polly.loop_header
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB0_2: # %polly.loop_header2
movl $1, %edi
xorl %edx, %edx
.p2align 4, 0x90
.LBB0_2: # %polly.loop_header1
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %r8d, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %r8, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
andl $1022, %esi # imm = 0x3FE
orl $1, %esi
xorps %xmm1, %xmm1
cvtsi2sdl %esi, %xmm1
mulsd %xmm0, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, -4(%rcx,%rdi,4)
movss %xmm1, -4(%rax,%rdi,4)
leal (%r9,%rdx), %esi
andl $1023, %esi # imm = 0x3FF
addl $1, %esi
xorps %xmm1, %xmm1
cvtsi2sdl %esi, %xmm1
mulsd %xmm0, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, (%rcx,%rdi,4)
movss %xmm1, (%rax,%rdi,4)
addq $2, %rdi
addl %r8d, %edx
cmpq $1537, %rdi # imm = 0x601
jne .LBB0_2
# BB#3: # %polly.loop_exit4
# %bb.3: # %polly.loop_exit3
# in Loop: Header=BB0_1 Depth=1
incq %r8
cmpq $1536, %r8 # imm = 0x600
addq $1, %r9
addq $6144, %rax # imm = 0x1800
addq $6144, %rcx # imm = 0x1800
addl $2, %r8d
cmpq $1536, %r9 # imm = 0x600
jne .LBB0_1
# BB#4: # %polly.loop_exit
# %bb.4: # %polly.exiting
popq %rbp
ret
.Ltmp5:
.size init_array, .Ltmp5-init_array
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size init_array, .Lfunc_end0-init_array
.cfi_endproc
.globl print_array
.align 16, 0x90
# -- End function
.globl print_array # -- Begin function print_array
.p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp9:
.cfi_def_cfa_offset 16
.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
.Ltmp12:
.cfi_offset %rbx, -48
.Ltmp13:
.cfi_offset %r12, -40
.Ltmp14:
pushq %rax
.cfi_offset %rbx, -56
.cfi_offset %r12, -48
.cfi_offset %r13, -40
.cfi_offset %r14, -32
.Ltmp15:
.cfi_offset %r15, -24
xorl %r14d, %r14d
movl $C, %r15d
.align 16, 0x90
leaq C(%rip), %r13
xorl %eax, %eax
movl $3435973837, %r12d # imm = 0xCCCCCCCD
leaq .L.str(%rip), %r14
.p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
movq stdout(%rip), %rax
movq %r15, %r12
movq %rax, -48(%rbp) # 8-byte Spill
movq stdout(%rip), %rsi
xorl %ebx, %ebx
.align 16, 0x90
.p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
vmovss (%r12), %xmm0
vcvtss2sd %xmm0, %xmm0, %xmm0
movq %rax, %rdi
movl $.L.str, %esi
movl %ebx, %eax
imulq %r12, %rax
shrq $38, %rax
leal (%rax,%rax,4), %r15d
shll $4, %r15d
addl $79, %r15d
movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
cvtss2sd %xmm0, %xmm0
movb $1, %al
movq %rsi, %rdi
movq %r14, %rsi
callq fprintf
movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
sarq $37, %rcx
addl %edx, %ecx
imull $80, %ecx, %ecx
subl %ecx, %eax
cmpl $79, %eax
cmpl %ebx, %r15d
jne .LBB1_4
# BB#3: # %if.then
# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
addq $4, %r12
incq %rbx
movq stdout(%rip), %rax
addq $1, %rbx
movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
# BB#5: # %for.end
# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
movq %rax, %rsi
callq fputc
addq $6144, %r15 # imm = 0x1800
incq %r14
cmpq $1536, %r14 # imm = 0x600
callq fputc@PLT
movq -48(%rbp), %rax # 8-byte Reload
addq $1, %rax
addq $6144, %r13 # imm = 0x1800
cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
# BB#6: # %for.end12
# %bb.6: # %for.end12
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.Ltmp16:
.size print_array, .Ltmp16-print_array
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end1:
.size print_array, .Lfunc_end1-print_array
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
.quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
# -- End function
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0: # %entry
# %bb.0: # %entry
pushq %rbp
.Ltmp20:
.cfi_def_cfa_offset 16
.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
.Ltmp23:
.cfi_offset %rbx, -32
.Ltmp24:
.cfi_offset %r14, -24
xorl %ebx, %ebx
vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
xorl %ecx, %ecx
.align 16, 0x90
.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
movl %ecx, %edx
imull %ebx, %edx
movl %edx, %esi
sarl $31, %esi
shrl $22, %esi
addl %edx, %esi
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
negl %esi
movq %rbx, %rax
shlq $11, %rax
leal 1(%rdx,%rsi), %edi
leaq (%rax,%rax,2), %rsi
leaq 1(%rcx), %rdx
cmpq $1536, %rdx # imm = 0x600
vcvtsi2sdl %edi, %xmm0, %xmm1
vmulsd %xmm0, %xmm1, %xmm1
vcvtsd2ss %xmm1, %xmm1, %xmm1
vmovss %xmm1, A(%rsi,%rcx,4)
vmovss %xmm1, B(%rsi,%rcx,4)
movq %rdx, %rcx
jne .LBB2_2
# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
incq %rbx
cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
# BB#4: # %polly.loop_preheader3.preheader
movl $C, %r14d
movl $C, %edi
callq init_array
leaq C(%rip), %rbx
xorl %r14d, %r14d
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset
xorl %eax, %eax
.align 16, 0x90
.LBB2_5: # %polly.loop_preheader17
movq %rbx, %rdi
callq memset@PLT
leaq B(%rip), %rax
leaq A(%rip), %rcx
.p2align 4, 0x90
.LBB2_1: # %polly.loop_header8
# =>This Loop Header: Depth=1
# Child Loop BB2_10 Depth 2
# Child Loop BB2_8 Depth 3
movl $B, %ebx
xorl %edx, %edx
.align 16, 0x90
.LBB2_10: # %polly.loop_preheader24
# Parent Loop BB2_5 Depth=1
# Child Loop BB2_2 Depth 2
# Child Loop BB2_3 Depth 3
movq %rax, %rdx
xorl %esi, %esi
.p2align 4, 0x90
.LBB2_2: # %polly.loop_header14
# Parent Loop BB2_1 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB2_8 Depth 3
leaq (%rax,%rax,2), %rcx
shlq $11, %rcx
vmovss A(%rcx,%rdx,4), %xmm0
movl $1536, %esi # imm = 0x600
movq %r14, %rdi
movq %rbx, %rcx
.align 16, 0x90
.LBB2_8: # %polly.loop_header23
# Parent Loop BB2_5 Depth=1
# Parent Loop BB2_10 Depth=2
# Child Loop BB2_3 Depth 3
leaq (%r14,%r14,2), %rdi
shlq $11, %rdi
addq %rcx, %rdi
movss (%rdi,%rsi,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
movl $12, %edi
.p2align 4, 0x90
.LBB2_3: # %vector.body
# Parent Loop BB2_1 Depth=1
# Parent Loop BB2_2 Depth=2
# => This Inner Loop Header: Depth=3
vmulss (%rcx), %xmm0, %xmm1
vaddss (%rdi), %xmm1, %xmm1
vmovss %xmm1, (%rdi)
addq $4, %rdi
addq $4, %rcx
decq %rsi
jne .LBB2_8
# BB#9: # %polly.loop_exit25
# in Loop: Header=BB2_10 Depth=2
movaps -48(%rdx,%rdi,4), %xmm1
mulps %xmm0, %xmm1
movaps -32(%rdx,%rdi,4), %xmm2
mulps %xmm0, %xmm2
addps -48(%rbx,%rdi,4), %xmm1
addps -32(%rbx,%rdi,4), %xmm2
movaps %xmm1, -48(%rbx,%rdi,4)
movaps %xmm2, -32(%rbx,%rdi,4)
movaps -16(%rdx,%rdi,4), %xmm1
mulps %xmm0, %xmm1
movaps (%rdx,%rdi,4), %xmm2
mulps %xmm0, %xmm2
addps -16(%rbx,%rdi,4), %xmm1
addps (%rbx,%rdi,4), %xmm2
movaps %xmm1, -16(%rbx,%rdi,4)
movaps %xmm2, (%rbx,%rdi,4)
addq $16, %rdi
cmpq $1548, %rdi # imm = 0x60C
jne .LBB2_3
# %bb.4: # %polly.loop_exit22
# in Loop: Header=BB2_2 Depth=2
addq $1, %rsi
addq $6144, %rdx # imm = 0x1800
cmpq $1536, %rsi # imm = 0x600
jne .LBB2_2
# %bb.5: # %polly.loop_exit16
# in Loop: Header=BB2_1 Depth=1
addq $1, %r14
addq $6144, %rbx # imm = 0x1800
incq %rdx
cmpq $1536, %rdx # imm = 0x600
jne .LBB2_10
# BB#6: # %polly.loop_exit18
# in Loop: Header=BB2_5 Depth=1
addq $6144, %r14 # imm = 0x1800
incq %rax
cmpq $1536, %rax # imm = 0x600
jne .LBB2_5
# BB#7: # %polly.loop_exit11
cmpq $1536, %r14 # imm = 0x600
jne .LBB2_1
# %bb.6: # %polly.exiting
xorl %eax, %eax
popq %rbx
popq %r14
popq %rbp
ret
.Ltmp25:
.size main, .Ltmp25-main
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end2:
.size main, .Lfunc_end2-main
.cfi_endproc
# -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@ -277,10 +250,11 @@ main: # @main
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%lf "
.asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
.ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits

View File

@ -1,4 +1,4 @@
; ModuleID = 'matmul.s'
; ModuleID = 'matmul.ll'
source_filename = "matmul.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@ -6,15 +6,15 @@ target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external global %struct._IO_FILE*, align 8
@A = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external dso_local global %struct._IO_FILE*, align 8
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
@C = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
; Function Attrs: nounwind uwtable
define void @init_array() #0 {
; Function Attrs: noinline nounwind uwtable
define dso_local void @init_array() #0 {
entry:
br label %entry.split
@ -22,44 +22,37 @@ entry.split: ; preds = %entry
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry.split, %for.inc17
%indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]
%indvars.iv4 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next5, %for.inc17 ]
br label %for.body3
for.body3: ; preds = %for.cond1.preheader, %for.body3
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
%0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5
%0 = mul nuw nsw i64 %indvars.iv, %indvars.iv4
%1 = trunc i64 %0 to i32
%rem = srem i32 %1, 1024
%add = add nsw i32 %rem, 1
%rem = and i32 %1, 1023
%add = add nuw nsw i32 %rem, 1
%conv = sitofp i32 %add to double
%div = fmul double %conv, 5.000000e-01
%conv4 = fptrunc double %div to float
%arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv
%arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv4, i64 %indvars.iv
store float %conv4, float* %arrayidx6, align 4
%2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5
%3 = trunc i64 %2 to i32
%rem8 = srem i32 %3, 1024
%add9 = add nsw i32 %rem8, 1
%conv10 = sitofp i32 %add9 to double
%div11 = fmul double %conv10, 5.000000e-01
%conv12 = fptrunc double %div11 to float
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv
store float %conv12, float* %arrayidx16, align 4
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv
store float %conv4, float* %arrayidx16, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1536
br i1 %exitcond, label %for.body3, label %for.inc17
for.inc17: ; preds = %for.body3
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
%exitcond7 = icmp ne i64 %indvars.iv.next6, 1536
br i1 %exitcond7, label %for.cond1.preheader, label %for.end19
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%exitcond6 = icmp ne i64 %indvars.iv.next5, 1536
br i1 %exitcond6, label %for.cond1.preheader, label %for.end19
for.end19: ; preds = %for.inc17
ret void
}
; Function Attrs: nounwind uwtable
define void @print_array() #0 {
; Function Attrs: noinline nounwind uwtable
define dso_local void @print_array() #0 {
entry:
br label %entry.split
@ -79,7 +72,7 @@ for.body3: ; preds = %for.cond1.preheader
%conv = fpext float %2 to double
%call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2
%3 = trunc i64 %indvars.iv to i32
%rem = srem i32 %3, 80
%rem = urem i32 %3, 80
%cmp6 = icmp eq i32 %rem, 79
br i1 %cmp6, label %if.then, label %for.inc
@ -105,10 +98,10 @@ for.end12: ; preds = %for.end
ret void
}
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
; Function Attrs: nounwind uwtable
define i32 @main() #0 {
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
entry:
br label %entry.split
@ -128,16 +121,14 @@ for.body3: ; preds = %for.cond1.preheader
for.body8: ; preds = %for.body3, %for.body8
%indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]
%arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
%0 = load float, float* %arrayidx12, align 4
%0 = load float, float* %arrayidx5, align 4
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv
%1 = load float, float* %arrayidx16, align 4
%arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4
%2 = load float, float* %arrayidx20, align 4
%mul = fmul float %1, %2
%add = fadd float %0, %mul
%arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
store float %add, float* %arrayidx24, align 4
store float %add, float* %arrayidx5, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1536
br i1 %exitcond, label %for.body8, label %for.inc25
@ -162,10 +153,12 @@ declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #2
; Function Attrs: nounwind
declare i32 @fputc(i32, %struct._IO_FILE* nocapture) #2
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.ident = !{!0}
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}

View File

@ -1,85 +1,83 @@
#!/bin/sh -a
echo "--> 1. Create LLVM-IR from C"
clang -S -emit-llvm matmul.c -o matmul.s
clang -S -emit-llvm matmul.c -Xclang -disable-O0-optnone -o matmul.ll
echo "--> 2. Prepare the LLVM-IR for Polly"
opt -S -polly-canonicalize matmul.s > matmul.preopt.ll
opt -S -polly-canonicalize matmul.ll -o matmul.preopt.ll
echo "--> 3. Show the SCoPs detected by Polly"
opt -basicaa -polly-ast -analyze -q matmul.preopt.ll \
-polly-process-unprofitable
opt -basicaa -polly-ast -analyze matmul.preopt.ll \
-polly-process-unprofitable -polly-use-llvm-names
echo "--> 4.1 Highlight the detected SCoPs in the CFGs of the program"
# We only create .dot files, as directly -view-scops directly calls graphviz
# which would require user interaction to continue the script.
# opt -basicaa -view-scops -disable-output matmul.preopt.ll
opt -basicaa -dot-scops -disable-output matmul.preopt.ll
opt -basicaa -dot-scops -disable-output matmul.preopt.ll -polly-use-llvm-names
echo "--> 4.2 Highlight the detected SCoPs in the CFGs of the program (print \
no instructions)"
# We only create .dot files, as directly -view-scops-only directly calls
# graphviz which would require user interaction to continue the script.
# opt -basicaa -view-scops-only -disable-output matmul.preopt.ll
opt -basicaa -dot-scops-only -disable-output matmul.preopt.ll
opt -basicaa -dot-scops-only -disable-output matmul.preopt.ll -polly-use-llvm-names
echo "--> 4.3 Create .png files from the .dot files"
for i in `ls *.dot`; do dot -Tpng $i > $i.png; done
echo "--> 5. View the polyhedral representation of the SCoPs"
opt -basicaa -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
opt -basicaa -polly-scops -analyze matmul.preopt.ll \
-polly-process-unprofitable -polly-use-llvm-names
echo "--> 6. Show the dependences for the SCoPs"
opt -basicaa -polly-dependences -analyze matmul.preopt.ll \
-polly-process-unprofitable
-polly-process-unprofitable -polly-use-llvm-names
echo "--> 7. Export jscop files"
opt -basicaa -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
opt -basicaa -polly-export-jscop matmul.preopt.ll \
-polly-process-unprofitable -disable-output -polly-use-llvm-names
echo "--> 8. Import the updated jscop files and print the new SCoPs. (optional)"
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-process-unprofitable
-polly-process-unprofitable -polly-use-llvm-names
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-import-jscop-postfix=interchanged -polly-process-unprofitable
-polly-import-jscop-postfix=interchanged -polly-process-unprofitable -polly-use-llvm-names
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-import-jscop-postfix=interchanged+tiled -polly-process-unprofitable
-polly-import-jscop-postfix=interchanged+tiled -polly-process-unprofitable -polly-use-llvm-names
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-import-jscop-postfix=interchanged+tiled+vector \
-polly-process-unprofitable
-polly-process-unprofitable -polly-use-llvm-names
echo "--> 9. Codegenerate the SCoPs"
opt -basicaa -polly-import-jscop -polly-import-jscop-postfix=interchanged \
-polly-codegen -polly-process-unprofitable\
matmul.preopt.ll | opt -O3 > matmul.polly.interchanged.ll
opt -basicaa -polly-import-jscop \
opt -S -basicaa -polly-import-jscop -polly-import-jscop-postfix=interchanged \
-polly-codegen -polly-process-unprofitable -polly-use-llvm-names \
matmul.preopt.ll | opt -O3 -S -o matmul.polly.interchanged.ll
opt -S -basicaa -polly-import-jscop \
-polly-import-jscop-postfix=interchanged+tiled -polly-codegen \
matmul.preopt.ll -polly-process-unprofitable \
| opt -O3 > matmul.polly.interchanged+tiled.ll
opt -basicaa -polly-import-jscop -polly-process-unprofitable\
matmul.preopt.ll -polly-process-unprofitable -polly-use-llvm-names \
| opt -O3 -S -o matmul.polly.interchanged+tiled.ll
opt -S -basicaa -polly-import-jscop -polly-process-unprofitable\
-polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \
matmul.preopt.ll -polly-vectorizer=polly\
| opt -O3 > matmul.polly.interchanged+tiled+vector.ll
opt -basicaa -polly-import-jscop -polly-process-unprofitable\
matmul.preopt.ll -polly-vectorizer=polly -polly-use-llvm-names \
| opt -O3 -S -o matmul.polly.interchanged+tiled+vector.ll
opt -S -basicaa -polly-import-jscop -polly-process-unprofitable\
-polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \
matmul.preopt.ll -polly-vectorizer=polly -polly-parallel\
| opt -O3 > matmul.polly.interchanged+tiled+vector+openmp.ll
opt matmul.preopt.ll | opt -O3 > matmul.normalopt.ll
matmul.preopt.ll -polly-vectorizer=polly -polly-parallel -polly-use-llvm-names \
| opt -O3 -S -o matmul.polly.interchanged+tiled+vector+openmp.ll
opt -S matmul.preopt.ll | opt -O3 -S -o matmul.normalopt.ll
echo "--> 10. Create the executables"
llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s && gcc matmul.polly.interchanged.s \
-o matmul.polly.interchanged.exe
llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s && gcc matmul.polly.interchanged+tiled.s \
-o matmul.polly.interchanged+tiled.exe
llc matmul.polly.interchanged+tiled+vector.ll \
-o matmul.polly.interchanged+tiled+vector.s \
&& gcc matmul.polly.interchanged+tiled+vector.s \
-o matmul.polly.interchanged+tiled+vector.exe
llc matmul.polly.interchanged+tiled+vector+openmp.ll \
-o matmul.polly.interchanged+tiled+vector+openmp.s \
&& gcc -lgomp matmul.polly.interchanged+tiled+vector+openmp.s \
-o matmul.polly.interchanged+tiled+vector+openmp.exe
llc matmul.normalopt.ll -o matmul.normalopt.s && gcc matmul.normalopt.s \
-o matmul.normalopt.exe
llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s -relocation-model=pic
gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s -relocation-model=pic
gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s -relocation-model=pic
gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s -relocation-model=pic
gcc matmul.polly.interchanged+tiled+vector+openmp.s -lgomp -o matmul.polly.interchanged+tiled+vector+openmp.exe
llc matmul.normalopt.ll -o matmul.normalopt.s -relocation-model=pic
gcc matmul.normalopt.s -lgomp -o matmul.normalopt.exe
echo "--> 11. Compare the runtime of the executables"

View File

@ -1,39 +1,39 @@
digraph "Scop Graph for 'init_array' function" {
label="Scop Graph for 'init_array' function";
Node0x5b5b5a0 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x5b5b5a0 -> Node0x5b5de30;
Node0x5b5de30 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
Node0x5b5de30 -> Node0x5b5de50;
Node0x5b5de50 [shape=record,label="{for.cond1.preheader: \l %indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]\l br label %for.body3\l}"];
Node0x5b5de50 -> Node0x5b5b570;
Node0x5b5b570 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.body3 ]\l %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %1 = trunc i64 %0 to i32\l %rem = srem i32 %1, 1024\l %add = add nsw i32 %rem, 1\l %conv = sitofp i32 %add to double\l %div = fmul double %conv, 5.000000e-01\l %conv4 = fptrunc double %div to float\l %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv4, float* %arrayidx6, align 4\l %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %3 = trunc i64 %2 to i32\l %rem8 = srem i32 %3, 1024\l %add9 = add nsw i32 %rem8, 1\l %conv10 = sitofp i32 %add9 to double\l %div11 = fmul double %conv10, 5.000000e-01\l %conv12 = fptrunc double %div11 to float\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv12, float* %arrayidx16, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.inc17\l}"];
Node0x5b5b570 -> Node0x5b5b570[constraint=false];
Node0x5b5b570 -> Node0x5b5df30;
Node0x5b5df30 [shape=record,label="{for.inc17: \l %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1\l %exitcond7 = icmp ne i64 %indvars.iv.next6, 1536\l br i1 %exitcond7, label %for.cond1.preheader, label %for.end19\l}"];
Node0x5b5df30 -> Node0x5b5de50[constraint=false];
Node0x5b5df30 -> Node0x5b5df90;
Node0x5b5df90 [shape=record,label="{for.end19: \l ret void\l}"];
Node0x7fffc6c46ea0 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x7fffc6c46ea0 -> Node0x7fffc6c46f20;
Node0x7fffc6c46f20 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
Node0x7fffc6c46f20 -> Node0x7fffc6c47000;
Node0x7fffc6c47000 [shape=record,label="{for.cond1.preheader: \l %indvars.iv4 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next5, %for.inc17 ]\l br label %for.body3\l}"];
Node0x7fffc6c47000 -> Node0x7fffc6c47290;
Node0x7fffc6c47290 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.body3 ]\l %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv4\l %1 = trunc i64 %0 to i32\l %rem = and i32 %1, 1023\l %add = add nuw nsw i32 %rem, 1\l %conv = sitofp i32 %add to double\l %div = fmul double %conv, 5.000000e-01\l %conv4 = fptrunc double %div to float\l %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @A, i64 0, i64 %indvars.iv4, i64 %indvars.iv\l store float %conv4, float* %arrayidx6, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv\l store float %conv4, float* %arrayidx16, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.inc17\l}"];
Node0x7fffc6c47290 -> Node0x7fffc6c47290[constraint=false];
Node0x7fffc6c47290 -> Node0x7fffc6c47b10;
Node0x7fffc6c47b10 [shape=record,label="{for.inc17: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.cond1.preheader, label %for.end19\l}"];
Node0x7fffc6c47b10 -> Node0x7fffc6c47000[constraint=false];
Node0x7fffc6c47b10 -> Node0x7fffc6c48b10;
Node0x7fffc6c48b10 [shape=record,label="{for.end19: \l ret void\l}"];
colorscheme = "paired12"
subgraph cluster_0x5b4bdd0 {
subgraph cluster_0x7fffc6c32540 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5b4bf50 {
subgraph cluster_0x7fffc6c32f30 {
label = "Region can not profitably be optimized!";
style = solid;
color = 6
subgraph cluster_0x5b4c0d0 {
subgraph cluster_0x7fffc6c32690 {
label = "";
style = solid;
color = 5
Node0x5b5b570;
Node0x7fffc6c47290;
}
Node0x5b5de50;
Node0x5b5df30;
Node0x7fffc6c47000;
Node0x7fffc6c47b10;
}
Node0x5b5b5a0;
Node0x5b5de30;
Node0x5b5df90;
Node0x7fffc6c46ea0;
Node0x7fffc6c46f20;
Node0x7fffc6c48b10;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 151 KiB

After

Width:  |  Height:  |  Size: 123 KiB

View File

@ -1,50 +1,50 @@
digraph "Scop Graph for 'main' function" {
label="Scop Graph for 'main' function";
Node0x5b5c850 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x5b5c850 -> Node0x5b5a440;
Node0x5b5a440 [shape=record,label="{entry.split: \l tail call void @init_array()\l br label %for.cond1.preheader\l}"];
Node0x5b5a440 -> Node0x5b38cd0;
Node0x5b38cd0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]\l br label %for.body3\l}"];
Node0x5b38cd0 -> Node0x5b4bd30;
Node0x5b4bd30 [shape=record,label="{for.body3: \l %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5,\l... %for.inc25 ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.body8\l}"];
Node0x5b4bd30 -> Node0x5b38c50;
Node0x5b38c50 [shape=record,label="{for.body8: \l %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]\l %arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l %0 = load float, float* %arrayidx12, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv\l %1 = load float, float* %arrayidx16, align 4\l %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4\l %2 = load float, float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l %arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float %add, float* %arrayidx24, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body8, label %for.inc25\l}"];
Node0x5b38c50 -> Node0x5b38c50[constraint=false];
Node0x5b38c50 -> Node0x5b5a290;
Node0x5b5a290 [shape=record,label="{for.inc25: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.body3, label %for.inc28\l}"];
Node0x5b5a290 -> Node0x5b4bd30[constraint=false];
Node0x5b5a290 -> Node0x5b5a340;
Node0x5b5a340 [shape=record,label="{for.inc28: \l %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1\l %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536\l br i1 %exitcond9, label %for.cond1.preheader, label %for.end30\l}"];
Node0x5b5a340 -> Node0x5b38cd0[constraint=false];
Node0x5b5a340 -> Node0x5b5a3a0;
Node0x5b5a3a0 [shape=record,label="{for.end30: \l ret i32 0\l}"];
Node0x7fffc6c4cb90 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x7fffc6c4cb90 -> Node0x7fffc6c47b10;
Node0x7fffc6c47b10 [shape=record,label="{entry.split: \l tail call void @init_array()\l br label %for.cond1.preheader\l}"];
Node0x7fffc6c47b10 -> Node0x7fffc6c456e0;
Node0x7fffc6c456e0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]\l br label %for.body3\l}"];
Node0x7fffc6c456e0 -> Node0x7fffc6c3f080;
Node0x7fffc6c3f080 [shape=record,label="{for.body3: \l %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5,\l... %for.inc25 ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.body8\l}"];
Node0x7fffc6c3f080 -> Node0x7fffc6c3f220;
Node0x7fffc6c3f220 [shape=record,label="{for.body8: \l %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]\l %0 = load float, float* %arrayidx5, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv\l %1 = load float, float* %arrayidx16, align 4\l %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4\l %2 = load float, float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l store float %add, float* %arrayidx5, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body8, label %for.inc25\l}"];
Node0x7fffc6c3f220 -> Node0x7fffc6c3f220[constraint=false];
Node0x7fffc6c3f220 -> Node0x7fffc6c40480;
Node0x7fffc6c40480 [shape=record,label="{for.inc25: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.body3, label %for.inc28\l}"];
Node0x7fffc6c40480 -> Node0x7fffc6c3f080[constraint=false];
Node0x7fffc6c40480 -> Node0x7fffc6c404e0;
Node0x7fffc6c404e0 [shape=record,label="{for.inc28: \l %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1\l %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536\l br i1 %exitcond9, label %for.cond1.preheader, label %for.end30\l}"];
Node0x7fffc6c404e0 -> Node0x7fffc6c456e0[constraint=false];
Node0x7fffc6c404e0 -> Node0x7fffc6c40540;
Node0x7fffc6c40540 [shape=record,label="{for.end30: \l ret i32 0\l}"];
colorscheme = "paired12"
subgraph cluster_0x5b5c970 {
subgraph cluster_0x7fffc6c32540 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5b5c5a0 {
subgraph cluster_0x7fffc6c32f30 {
label = "";
style = filled;
color = 3 subgraph cluster_0x5b5c9f0 {
color = 3 subgraph cluster_0x7fffc6c32690 {
label = "";
style = solid;
color = 5
subgraph cluster_0x5b5c110 {
subgraph cluster_0x7fffc6c32dc0 {
label = "";
style = solid;
color = 7
Node0x5b38c50;
Node0x7fffc6c3f220;
}
Node0x5b4bd30;
Node0x5b5a290;
Node0x7fffc6c3f080;
Node0x7fffc6c40480;
}
Node0x5b38cd0;
Node0x5b5a340;
Node0x7fffc6c456e0;
Node0x7fffc6c404e0;
}
Node0x5b5c850;
Node0x5b5a440;
Node0x5b5a3a0;
Node0x7fffc6c4cb90;
Node0x7fffc6c47b10;
Node0x7fffc6c40540;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 186 KiB

After

Width:  |  Height:  |  Size: 175 KiB

View File

@ -1,51 +1,51 @@
digraph "Scop Graph for 'print_array' function" {
label="Scop Graph for 'print_array' function";
Node0x5b5ee00 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x5b5ee00 -> Node0x5b5ee50;
Node0x5b5ee50 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
Node0x5b5ee50 -> Node0x5b5ee70;
Node0x5b5ee70 [shape=record,label="{for.cond1.preheader: \l %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]\l %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l br label %for.body3\l}"];
Node0x5b5ee70 -> Node0x5b5ee20;
Node0x5b5ee20 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.inc ]\l %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv\l %2 = load float, float* %arrayidx5, align 4\l %conv = fpext float %2 to double\l %call = tail call i32 (%struct._IO_FILE*, i8*, ...)\l... @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x\l... i8]* @.str, i64 0, i64 0), double %conv) #2\l %3 = trunc i64 %indvars.iv to i32\l %rem = srem i32 %3, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %for.inc\l}"];
Node0x5b5ee20 -> Node0x5b60d10;
Node0x5b5ee20 -> Node0x5b60d70;
Node0x5b60d10 [shape=record,label="{if.then: \l %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)\l br label %for.inc\l}"];
Node0x5b60d10 -> Node0x5b60d70;
Node0x5b60d70 [shape=record,label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
Node0x5b60d70 -> Node0x5b5ee20[constraint=false];
Node0x5b60d70 -> Node0x5b60e10;
Node0x5b60e10 [shape=record,label="{for.end: \l %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]\l %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)\l %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1\l %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536\l br i1 %exitcond8, label %for.cond1.preheader, label %for.end12\l}"];
Node0x5b60e10 -> Node0x5b5ee70[constraint=false];
Node0x5b60e10 -> Node0x5b60e70;
Node0x5b60e70 [shape=record,label="{for.end12: \l ret void\l}"];
Node0x7fffc6c42bf0 [shape=record,label="{entry:\l br label %entry.split\l}"];
Node0x7fffc6c42bf0 -> Node0x7fffc6c42f10;
Node0x7fffc6c42f10 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
Node0x7fffc6c42f10 -> Node0x7fffc6c4abb0;
Node0x7fffc6c4abb0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]\l %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l br label %for.body3\l}"];
Node0x7fffc6c4abb0 -> Node0x7fffc6c4ac10;
Node0x7fffc6c4ac10 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.inc ]\l %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv\l %2 = load float, float* %arrayidx5, align 4\l %conv = fpext float %2 to double\l %call = tail call i32 (%struct._IO_FILE*, i8*, ...)\l... @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x\l... i8]* @.str, i64 0, i64 0), double %conv) #2\l %3 = trunc i64 %indvars.iv to i32\l %rem = urem i32 %3, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %for.inc\l}"];
Node0x7fffc6c4ac10 -> Node0x7fffc6c4af80;
Node0x7fffc6c4ac10 -> Node0x7fffc6c4afe0;
Node0x7fffc6c4af80 [shape=record,label="{if.then: \l %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)\l br label %for.inc\l}"];
Node0x7fffc6c4af80 -> Node0x7fffc6c4afe0;
Node0x7fffc6c4afe0 [shape=record,label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
Node0x7fffc6c4afe0 -> Node0x7fffc6c4ac10[constraint=false];
Node0x7fffc6c4afe0 -> Node0x7fffc6c4b3b0;
Node0x7fffc6c4b3b0 [shape=record,label="{for.end: \l %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]\l %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)\l %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1\l %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536\l br i1 %exitcond8, label %for.cond1.preheader, label %for.end12\l}"];
Node0x7fffc6c4b3b0 -> Node0x7fffc6c4abb0[constraint=false];
Node0x7fffc6c4b3b0 -> Node0x7fffc6c4b580;
Node0x7fffc6c4b580 [shape=record,label="{for.end12: \l ret void\l}"];
colorscheme = "paired12"
subgraph cluster_0x5b349a0 {
subgraph cluster_0x7fffc6c32540 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5b5c2c0 {
subgraph cluster_0x7fffc6c32dc0 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 6
subgraph cluster_0x5b5c240 {
subgraph cluster_0x7fffc6c32690 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 5
subgraph cluster_0x5b34a20 {
subgraph cluster_0x7fffc6c32f30 {
label = "Region can not profitably be optimized!";
style = solid;
color = 7
Node0x5b5ee20;
Node0x5b60d10;
Node0x7fffc6c4ac10;
Node0x7fffc6c4af80;
}
Node0x5b60d70;
Node0x7fffc6c4afe0;
}
Node0x5b5ee70;
Node0x5b60e10;
Node0x7fffc6c4abb0;
Node0x7fffc6c4b3b0;
}
Node0x5b5ee00;
Node0x5b5ee50;
Node0x5b60e70;
Node0x7fffc6c42bf0;
Node0x7fffc6c42f10;
Node0x7fffc6c4b580;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 196 KiB

After

Width:  |  Height:  |  Size: 205 KiB

View File

@ -1,39 +1,39 @@
digraph "Scop Graph for 'init_array' function" {
label="Scop Graph for 'init_array' function";
Node0x5ae2570 [shape=record,label="{entry}"];
Node0x5ae2570 -> Node0x5ae4e90;
Node0x5ae4e90 [shape=record,label="{entry.split}"];
Node0x5ae4e90 -> Node0x5ae4f50;
Node0x5ae4f50 [shape=record,label="{for.cond1.preheader}"];
Node0x5ae4f50 -> Node0x5ae50e0;
Node0x5ae50e0 [shape=record,label="{for.body3}"];
Node0x5ae50e0 -> Node0x5ae50e0[constraint=false];
Node0x5ae50e0 -> Node0x5ae5100;
Node0x5ae5100 [shape=record,label="{for.inc17}"];
Node0x5ae5100 -> Node0x5ae4f50[constraint=false];
Node0x5ae5100 -> Node0x5ae4ff0;
Node0x5ae4ff0 [shape=record,label="{for.end19}"];
Node0x7fffdb5cceb0 [shape=record,label="{entry}"];
Node0x7fffdb5cceb0 -> Node0x7fffdb5ccf00;
Node0x7fffdb5ccf00 [shape=record,label="{entry.split}"];
Node0x7fffdb5ccf00 -> Node0x7fffdb5ccf80;
Node0x7fffdb5ccf80 [shape=record,label="{for.cond1.preheader}"];
Node0x7fffdb5ccf80 -> Node0x7fffdb5cd090;
Node0x7fffdb5cd090 [shape=record,label="{for.body3}"];
Node0x7fffdb5cd090 -> Node0x7fffdb5cd090[constraint=false];
Node0x7fffdb5cd090 -> Node0x7fffdb5cd0b0;
Node0x7fffdb5cd0b0 [shape=record,label="{for.inc17}"];
Node0x7fffdb5cd0b0 -> Node0x7fffdb5ccf80[constraint=false];
Node0x7fffdb5cd0b0 -> Node0x7fffdb5cd2a0;
Node0x7fffdb5cd2a0 [shape=record,label="{for.end19}"];
colorscheme = "paired12"
subgraph cluster_0x5ad2dd0 {
subgraph cluster_0x7fffdb5b8530 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5ad2f50 {
subgraph cluster_0x7fffdb5b8f40 {
label = "Region can not profitably be optimized!";
style = solid;
color = 6
subgraph cluster_0x5ad30d0 {
subgraph cluster_0x7fffdb5b86a0 {
label = "";
style = solid;
color = 5
Node0x5ae50e0;
Node0x7fffdb5cd090;
}
Node0x5ae4f50;
Node0x5ae5100;
Node0x7fffdb5ccf80;
Node0x7fffdb5cd0b0;
}
Node0x5ae2570;
Node0x5ae4e90;
Node0x5ae4ff0;
Node0x7fffdb5cceb0;
Node0x7fffdb5ccf00;
Node0x7fffdb5cd2a0;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -1,50 +1,50 @@
digraph "Scop Graph for 'main' function" {
label="Scop Graph for 'main' function";
Node0x5abfcf0 [shape=record,label="{entry}"];
Node0x5abfcf0 -> Node0x5ade060;
Node0x5ade060 [shape=record,label="{entry.split}"];
Node0x5ade060 -> Node0x5ade0e0;
Node0x5ade0e0 [shape=record,label="{for.cond1.preheader}"];
Node0x5ade0e0 -> Node0x5ade100;
Node0x5ade100 [shape=record,label="{for.body3}"];
Node0x5ade100 -> Node0x5ae0020;
Node0x5ae0020 [shape=record,label="{for.body8}"];
Node0x5ae0020 -> Node0x5ae0020[constraint=false];
Node0x5ae0020 -> Node0x5ae0080;
Node0x5ae0080 [shape=record,label="{for.inc25}"];
Node0x5ae0080 -> Node0x5ade100[constraint=false];
Node0x5ae0080 -> Node0x5adfef0;
Node0x5adfef0 [shape=record,label="{for.inc28}"];
Node0x5adfef0 -> Node0x5ade0e0[constraint=false];
Node0x5adfef0 -> Node0x5adff50;
Node0x5adff50 [shape=record,label="{for.end30}"];
Node0x7fffdb5cbd10 [shape=record,label="{entry}"];
Node0x7fffdb5cbd10 -> Node0x7fffdb5c7140;
Node0x7fffdb5c7140 [shape=record,label="{entry.split}"];
Node0x7fffdb5c7140 -> Node0x7fffdb5c7200;
Node0x7fffdb5c7200 [shape=record,label="{for.cond1.preheader}"];
Node0x7fffdb5c7200 -> Node0x7fffdb5ccd60;
Node0x7fffdb5ccd60 [shape=record,label="{for.body3}"];
Node0x7fffdb5ccd60 -> Node0x7fffdb5ccd80;
Node0x7fffdb5ccd80 [shape=record,label="{for.body8}"];
Node0x7fffdb5ccd80 -> Node0x7fffdb5ccd80[constraint=false];
Node0x7fffdb5ccd80 -> Node0x7fffdb5cce20;
Node0x7fffdb5cce20 [shape=record,label="{for.inc25}"];
Node0x7fffdb5cce20 -> Node0x7fffdb5ccd60[constraint=false];
Node0x7fffdb5cce20 -> Node0x7fffdb5cce80;
Node0x7fffdb5cce80 [shape=record,label="{for.inc28}"];
Node0x7fffdb5cce80 -> Node0x7fffdb5c7200[constraint=false];
Node0x7fffdb5cce80 -> Node0x7fffdb5ccee0;
Node0x7fffdb5ccee0 [shape=record,label="{for.end30}"];
colorscheme = "paired12"
subgraph cluster_0x5ad2c80 {
subgraph cluster_0x7fffdb5b8530 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5ad2e50 {
subgraph cluster_0x7fffdb5b8f40 {
label = "";
style = filled;
color = 3 subgraph cluster_0x5ad2d00 {
color = 3 subgraph cluster_0x7fffdb5b86a0 {
label = "";
style = solid;
color = 5
subgraph cluster_0x5ad2dd0 {
subgraph cluster_0x7fffdb5cc3c0 {
label = "";
style = solid;
color = 7
Node0x5ae0020;
Node0x7fffdb5ccd80;
}
Node0x5ade100;
Node0x5ae0080;
Node0x7fffdb5ccd60;
Node0x7fffdb5cce20;
}
Node0x5ade0e0;
Node0x5adfef0;
Node0x7fffdb5c7200;
Node0x7fffdb5cce80;
}
Node0x5abfcf0;
Node0x5ade060;
Node0x5adff50;
Node0x7fffdb5cbd10;
Node0x7fffdb5c7140;
Node0x7fffdb5ccee0;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

After

Width:  |  Height:  |  Size: 34 KiB

View File

@ -1,51 +1,51 @@
digraph "Scop Graph for 'print_array' function" {
label="Scop Graph for 'print_array' function";
Node0x5ae5e30 [shape=record,label="{entry}"];
Node0x5ae5e30 -> Node0x5ae5f50;
Node0x5ae5f50 [shape=record,label="{entry.split}"];
Node0x5ae5f50 -> Node0x5ae7d90;
Node0x5ae7d90 [shape=record,label="{for.cond1.preheader}"];
Node0x5ae7d90 -> Node0x5ae7f20;
Node0x5ae7f20 [shape=record,label="{for.body3}"];
Node0x5ae7f20 -> Node0x5ae7f40;
Node0x5ae7f20 -> Node0x5ae7f60;
Node0x5ae7f40 [shape=record,label="{if.then}"];
Node0x5ae7f40 -> Node0x5ae7f60;
Node0x5ae7f60 [shape=record,label="{for.inc}"];
Node0x5ae7f60 -> Node0x5ae7f20[constraint=false];
Node0x5ae7f60 -> Node0x5ae7e30;
Node0x5ae7e30 [shape=record,label="{for.end}"];
Node0x5ae7e30 -> Node0x5ae7d90[constraint=false];
Node0x5ae7e30 -> Node0x5ae8110;
Node0x5ae8110 [shape=record,label="{for.end12}"];
Node0x7fffdb5c9180 [shape=record,label="{entry}"];
Node0x7fffdb5c9180 -> Node0x7fffdb5b7940;
Node0x7fffdb5b7940 [shape=record,label="{entry.split}"];
Node0x7fffdb5b7940 -> Node0x7fffdb5b7960;
Node0x7fffdb5b7960 [shape=record,label="{for.cond1.preheader}"];
Node0x7fffdb5b7960 -> Node0x7fffdb5b79c0;
Node0x7fffdb5b79c0 [shape=record,label="{for.body3}"];
Node0x7fffdb5b79c0 -> Node0x7fffdb5b79e0;
Node0x7fffdb5b79c0 -> Node0x7fffdb5b7a80;
Node0x7fffdb5b79e0 [shape=record,label="{if.then}"];
Node0x7fffdb5b79e0 -> Node0x7fffdb5b7a80;
Node0x7fffdb5b7a80 [shape=record,label="{for.inc}"];
Node0x7fffdb5b7a80 -> Node0x7fffdb5b79c0[constraint=false];
Node0x7fffdb5b7a80 -> Node0x7fffdb5b7ae0;
Node0x7fffdb5b7ae0 [shape=record,label="{for.end}"];
Node0x7fffdb5b7ae0 -> Node0x7fffdb5b7960[constraint=false];
Node0x7fffdb5b7ae0 -> Node0x7fffdb5b7b40;
Node0x7fffdb5b7b40 [shape=record,label="{for.end12}"];
colorscheme = "paired12"
subgraph cluster_0x5abb9a0 {
subgraph cluster_0x7fffdb5b8530 {
label = "";
style = solid;
color = 1
subgraph cluster_0x5ae32c0 {
subgraph cluster_0x7fffdb5cc3c0 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 6
subgraph cluster_0x5ae3240 {
subgraph cluster_0x7fffdb5b86a0 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 5
subgraph cluster_0x5abba20 {
subgraph cluster_0x7fffdb5b8f40 {
label = "Region can not profitably be optimized!";
style = solid;
color = 7
Node0x5ae7f20;
Node0x5ae7f40;
Node0x7fffdb5b79c0;
Node0x7fffdb5b79e0;
}
Node0x5ae7f60;
Node0x7fffdb5b7a80;
}
Node0x5ae7d90;
Node0x5ae7e30;
Node0x7fffdb5b7960;
Node0x7fffdb5b7ae0;
}
Node0x5ae5e30;
Node0x5ae5f50;
Node0x5ae8110;
Node0x7fffdb5c9180;
Node0x7fffdb5b7940;
Node0x7fffdb5b7b40;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

After

Width:  |  Height:  |  Size: 76 KiB