[doc] Fix HowToManuallyUseTheIndividualPiecesOfPolly
Also remove compiled binaries. llvm-svn: 343119
@ -21,7 +21,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
clang -S -emit-llvm matmul.c -o matmul.s
|
||||
clang -S -emit-llvm matmul.c -Xclang -disable-O0-optnone -o matmul.ll
|
||||
|
||||
|
||||
2. **Prepare the LLVM-IR for Polly**
|
||||
@ -34,7 +34,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
opt -S -polly-canonicalize matmul.s > matmul.preopt.ll
|
||||
opt -S -polly-canonicalize matmul.ll -o matmul.preopt.ll
|
||||
|
||||
3. **Show the SCoPs detected by Polly (optional)**
|
||||
--------------------------------------------------
|
||||
@ -45,7 +45,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt -polly-ast -analyze -q matmul.preopt.ll -polly-process-unprofitable
|
||||
$ opt -basicaa -polly-ast -analyze matmul.preopt.ll -polly-process-unprofitable -polly-use-llvm-names
|
||||
|
||||
.. code-block:: guess
|
||||
|
||||
@ -84,8 +84,8 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt -view-scops -disable-output matmul.preopt.ll
|
||||
$ opt -view-scops-only -disable-output matmul.preopt.ll
|
||||
$ opt -polly-use-llvm-names -basicaa -view-scops -disable-output matmul.preopt.ll
|
||||
$ opt -polly-use-llvm-names -basicaa -view-scops-only -disable-output matmul.preopt.ll
|
||||
|
||||
The output for the different functions:
|
||||
|
||||
@ -104,7 +104,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
|
||||
$ opt -polly-use-llvm-names -basicaa -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
|
||||
|
||||
.. code-block:: guess
|
||||
|
||||
@ -194,7 +194,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt -polly-dependences -analyze matmul.preopt.ll -polly-process-unprofitable
|
||||
$ opt -basicaa -polly-use-llvm-names -polly-dependences -analyze matmul.preopt.ll -polly-process-unprofitable
|
||||
|
||||
.. code-block:: guess
|
||||
|
||||
@ -226,7 +226,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
|
||||
$ opt -basicaa -polly-use-llvm-names -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
|
||||
|
||||
.. code-block:: guess
|
||||
|
||||
@ -254,7 +254,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll -polly-import-jscop -polly-ast -analyze -polly-process-unprofitable
|
||||
$ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-ast -analyze -polly-process-unprofitable
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
@ -282,7 +282,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-ast -analyze -polly-process-unprofitable
|
||||
$ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-ast -analyze -polly-process-unprofitable
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
@ -311,7 +311,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
|
||||
$ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
@ -346,7 +346,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
|
||||
$ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
@ -383,11 +383,11 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll | opt -O3 > matmul.normalopt.ll
|
||||
$ opt -S matmul.preopt.ll | opt -S -O3 -o matmul.normalopt.ll
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-codegen -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged.ll
|
||||
$ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-codegen -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged.ll
|
||||
|
||||
.. code-block:: guess
|
||||
|
||||
@ -397,7 +397,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-codegen -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled.ll
|
||||
$ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-codegen -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled.ll
|
||||
|
||||
.. code-block:: guess
|
||||
|
||||
@ -407,7 +407,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled+vector.ll
|
||||
$ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled+vector.ll
|
||||
|
||||
.. code-block:: guess
|
||||
|
||||
@ -417,7 +417,7 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-parallel -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled+openmp.ll
|
||||
$ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-parallel -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled+openmp.ll
|
||||
|
||||
.. code-block:: guess
|
||||
|
||||
@ -431,11 +431,16 @@ performance improvement can be expected by an optimal automatic optimizer.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ llc matmul.normalopt.ll -o matmul.normalopt.s && gcc matmul.normalopt.s -o matmul.normalopt.exe
|
||||
$ llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s && gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
|
||||
$ llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s && gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
|
||||
$ llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s && gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
|
||||
$ llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s && gcc -fopenmp matmul.polly.interchanged+tiled+vector+openmp.s -o matmul.polly.interchanged+tiled+vector+openmp.exe
|
||||
$ llc matmul.normalopt.ll -o matmul.normalopt.s -relocation-model=pic
|
||||
$ gcc matmul.normalopt.s -o matmul.normalopt.exe
|
||||
$ llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s -relocation-model=pic
|
||||
$ gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
|
||||
$ llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s -relocation-model=pic
|
||||
$ gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
|
||||
$ llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s -relocation-model=pic
|
||||
$ gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
|
||||
$ llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s -relocation-model=pic
|
||||
$ gcc matmul.polly.interchanged+tiled+vector+openmp.s -lgomp -o matmul.polly.interchanged+tiled+vector+openmp.exe
|
||||
|
||||
11. **Compare the runtime of the executables**
|
||||
----------------------------------------------
|
||||
|
@ -1,33 +1,39 @@
|
||||
{
|
||||
"arrays" : [
|
||||
"arrays": [
|
||||
{
|
||||
"name" : "MemRef_A",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_A",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_B",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_B",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"context" : "{ : }",
|
||||
"name" : "%for.cond1.preheader---%for.end19",
|
||||
"statements" : [
|
||||
"context": "{ : }",
|
||||
"name": "%for.cond1.preheader---%for.end19",
|
||||
"statements": [
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
|
||||
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name": "Stmt_for_body3",
|
||||
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
{
|
||||
"arrays": [
|
||||
{
|
||||
"name": "MemRef_A",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name": "MemRef_B",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"context": "{ : }",
|
||||
"name": "%for.cond1.preheader---%for.end19",
|
||||
"statements": [
|
||||
{
|
||||
"accesses": [
|
||||
{
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
|
||||
},
|
||||
{
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name": "Stmt_for_body3",
|
||||
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
{
|
||||
"arrays": [
|
||||
{
|
||||
"name": "MemRef_A",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name": "MemRef_B",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"context": "{ : }",
|
||||
"name": "%for.cond1.preheader---%for.end19",
|
||||
"statements": [
|
||||
{
|
||||
"accesses": [
|
||||
{
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
|
||||
},
|
||||
{
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name": "Stmt_for_body3",
|
||||
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
{
|
||||
"arrays": [
|
||||
{
|
||||
"name": "MemRef_A",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name": "MemRef_B",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"context": "{ : }",
|
||||
"name": "%for.cond1.preheader---%for.end19",
|
||||
"statements": [
|
||||
{
|
||||
"accesses": [
|
||||
{
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
|
||||
},
|
||||
{
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name": "Stmt_for_body3",
|
||||
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
|
||||
}
|
||||
]
|
||||
}
|
@ -1,57 +1,66 @@
|
||||
{
|
||||
"arrays" : [
|
||||
"arrays": [
|
||||
{
|
||||
"name" : "MemRef_C",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_C",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_A",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_A",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_B",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_B",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"context" : "{ : }",
|
||||
"name" : "%for.cond1.preheader---%for.end30",
|
||||
"statements" : [
|
||||
"context": "{ : }",
|
||||
"name": "%for.cond1.preheader---%for.end30",
|
||||
"statements": [
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "{ Stmt_for_body3[i0, i1] -> [i0, i1, 0, 0] }"
|
||||
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name": "Stmt_for_body3",
|
||||
"schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1, 0, 0] }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
|
||||
"name" : "Stmt_for_body8",
|
||||
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [i0, i1, 1, i2] }"
|
||||
"domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
|
||||
"name": "Stmt_for_body8",
|
||||
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> [i0, i1, 1, i2] }"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,57 +1,66 @@
|
||||
{
|
||||
"arrays" : [
|
||||
"arrays": [
|
||||
{
|
||||
"name" : "MemRef_C",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_C",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_A",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_A",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_B",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_B",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"context" : "{ : }",
|
||||
"name" : "%for.cond1.preheader---%for.end30",
|
||||
"statements" : [
|
||||
"context": "{ : }",
|
||||
"name": "%for.cond1.preheader---%for.end30",
|
||||
"statements": [
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0] }"
|
||||
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name": "Stmt_for_body3",
|
||||
"schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0] }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
|
||||
"name" : "Stmt_for_body8",
|
||||
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, i0, i2, i1] }"
|
||||
"domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
|
||||
"name": "Stmt_for_body8",
|
||||
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, i0, i2, i1] }"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,57 +1,66 @@
|
||||
{
|
||||
"arrays" : [
|
||||
"arrays": [
|
||||
{
|
||||
"name" : "MemRef_C",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_C",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_A",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_A",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_B",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_B",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"context" : "{ : }",
|
||||
"name" : "%for.cond1.preheader---%for.end30",
|
||||
"statements" : [
|
||||
"context": "{ : }",
|
||||
"name": "%for.cond1.preheader---%for.end30",
|
||||
"statements": [
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0 ] }"
|
||||
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name": "Stmt_for_body3",
|
||||
"schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0 ] }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
|
||||
"name" : "Stmt_for_body8",
|
||||
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, i1]: o0 <= i0 < o0 + 64 and o1 <= i1 < o1 + 64 and o2 <= i2 < o2 + 64 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 }"
|
||||
"domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
|
||||
"name": "Stmt_for_body8",
|
||||
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, i1]: o0 <= i0 < o0 + 64 and o1 <= i1 < o1 + 64 and o2 <= i2 < o2 + 64 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 }"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,57 +1,66 @@
|
||||
{
|
||||
"arrays" : [
|
||||
"arrays": [
|
||||
{
|
||||
"name" : "MemRef_C",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_C",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_A",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_A",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name" : "MemRef_B",
|
||||
"sizes" : [ "1536" ],
|
||||
"type" : "float"
|
||||
"name": "MemRef_B",
|
||||
"sizes": [
|
||||
"*",
|
||||
"1536"
|
||||
],
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"context" : "{ : }",
|
||||
"name" : "%for.cond1.preheader---%for.end30",
|
||||
"statements" : [
|
||||
"context": "{ : }",
|
||||
"name": "%for.cond1.preheader---%for.end30",
|
||||
"statements": [
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0, 0 ] }"
|
||||
"domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
|
||||
"name": "Stmt_for_body3",
|
||||
"schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0, 0 ] }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
"accesses": [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
|
||||
"kind": "read",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
"kind": "write",
|
||||
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
|
||||
"name" : "Stmt_for_body8",
|
||||
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, oo1, i1]: o0 <= i0 < o0 + 64 and o1 <= oo1 < o1 + 64 and o2 <= i2 < o2 + 64 and oo1 <= i1 < oo1 + 4 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 and oo1 % 4 = 0 }"
|
||||
"domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
|
||||
"name": "Stmt_for_body8",
|
||||
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, oo1, i1]: o0 <= i0 < o0 + 64 and o1 <= oo1 < o1 + 64 and o2 <= i2 < o2 + 64 and oo1 <= i1 < oo1 + 4 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 and oo1 % 4 = 0 }"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -6,15 +6,15 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
|
||||
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
|
||||
|
||||
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@stdout = external global %struct._IO_FILE*, align 8
|
||||
@A = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@B = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@stdout = external dso_local global %struct._IO_FILE*, align 8
|
||||
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
|
||||
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@C = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @init_array() #0 {
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
define dso_local void @init_array() #0 {
|
||||
entry:
|
||||
%i = alloca i32, align 4
|
||||
%j = alloca i32, align 4
|
||||
@ -44,12 +44,12 @@ for.body3: ; preds = %for.cond1
|
||||
%conv = sitofp i32 %add to double
|
||||
%div = fdiv double %conv, 2.000000e+00
|
||||
%conv4 = fptrunc double %div to float
|
||||
%4 = load i32, i32* %j, align 4
|
||||
%4 = load i32, i32* %i, align 4
|
||||
%idxprom = sext i32 %4 to i64
|
||||
%5 = load i32, i32* %i, align 4
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom
|
||||
%5 = load i32, i32* %j, align 4
|
||||
%idxprom5 = sext i32 %5 to i64
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5
|
||||
%arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
|
||||
%arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom5
|
||||
store float %conv4, float* %arrayidx6, align 4
|
||||
%6 = load i32, i32* %i, align 4
|
||||
%7 = load i32, i32* %j, align 4
|
||||
@ -59,12 +59,12 @@ for.body3: ; preds = %for.cond1
|
||||
%conv10 = sitofp i32 %add9 to double
|
||||
%div11 = fdiv double %conv10, 2.000000e+00
|
||||
%conv12 = fptrunc double %div11 to float
|
||||
%8 = load i32, i32* %j, align 4
|
||||
%8 = load i32, i32* %i, align 4
|
||||
%idxprom13 = sext i32 %8 to i64
|
||||
%9 = load i32, i32* %i, align 4
|
||||
%idxprom14 = sext i32 %9 to i64
|
||||
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14
|
||||
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
|
||||
%arrayidx14 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom13
|
||||
%9 = load i32, i32* %j, align 4
|
||||
%idxprom15 = sext i32 %9 to i64
|
||||
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx14, i64 0, i64 %idxprom15
|
||||
store float %conv12, float* %arrayidx16, align 4
|
||||
br label %for.inc
|
||||
|
||||
@ -87,8 +87,8 @@ for.end19: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @print_array() #0 {
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
define dso_local void @print_array() #0 {
|
||||
entry:
|
||||
%i = alloca i32, align 4
|
||||
%j = alloca i32, align 4
|
||||
@ -111,12 +111,12 @@ for.cond1: ; preds = %for.inc, %for.body
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
|
||||
%3 = load i32, i32* %j, align 4
|
||||
%3 = load i32, i32* %i, align 4
|
||||
%idxprom = sext i32 %3 to i64
|
||||
%4 = load i32, i32* %i, align 4
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom
|
||||
%4 = load i32, i32* %j, align 4
|
||||
%idxprom4 = sext i32 %4 to i64
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
|
||||
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
|
||||
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom4
|
||||
%5 = load float, float* %arrayidx5, align 4
|
||||
%conv = fpext float %5 to double
|
||||
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
|
||||
@ -154,10 +154,10 @@ for.end12: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
|
||||
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @main() #0 {
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
define dso_local i32 @main() #0 {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
@ -185,12 +185,12 @@ for.cond1: ; preds = %for.inc25, %for.bod
|
||||
br i1 %cmp2, label %for.body3, label %for.end27
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%2 = load i32, i32* %j, align 4
|
||||
%2 = load i32, i32* %i, align 4
|
||||
%idxprom = sext i32 %2 to i64
|
||||
%3 = load i32, i32* %i, align 4
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom
|
||||
%3 = load i32, i32* %j, align 4
|
||||
%idxprom4 = sext i32 %3 to i64
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
|
||||
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
|
||||
%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom4
|
||||
store float 0.000000e+00, float* %arrayidx5, align 4
|
||||
store i32 0, i32* %k, align 4
|
||||
br label %for.cond6
|
||||
@ -201,35 +201,35 @@ for.cond6: ; preds = %for.inc, %for.body3
|
||||
br i1 %cmp7, label %for.body8, label %for.end
|
||||
|
||||
for.body8: ; preds = %for.cond6
|
||||
%5 = load i32, i32* %j, align 4
|
||||
%5 = load i32, i32* %i, align 4
|
||||
%idxprom9 = sext i32 %5 to i64
|
||||
%6 = load i32, i32* %i, align 4
|
||||
%idxprom10 = sext i32 %6 to i64
|
||||
%arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10
|
||||
%arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9
|
||||
%arrayidx10 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom9
|
||||
%6 = load i32, i32* %j, align 4
|
||||
%idxprom11 = sext i32 %6 to i64
|
||||
%arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx10, i64 0, i64 %idxprom11
|
||||
%7 = load float, float* %arrayidx12, align 4
|
||||
%8 = load i32, i32* %k, align 4
|
||||
%8 = load i32, i32* %i, align 4
|
||||
%idxprom13 = sext i32 %8 to i64
|
||||
%9 = load i32, i32* %i, align 4
|
||||
%idxprom14 = sext i32 %9 to i64
|
||||
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14
|
||||
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
|
||||
%arrayidx14 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom13
|
||||
%9 = load i32, i32* %k, align 4
|
||||
%idxprom15 = sext i32 %9 to i64
|
||||
%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx14, i64 0, i64 %idxprom15
|
||||
%10 = load float, float* %arrayidx16, align 4
|
||||
%11 = load i32, i32* %j, align 4
|
||||
%11 = load i32, i32* %k, align 4
|
||||
%idxprom17 = sext i32 %11 to i64
|
||||
%12 = load i32, i32* %k, align 4
|
||||
%idxprom18 = sext i32 %12 to i64
|
||||
%arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18
|
||||
%arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17
|
||||
%arrayidx18 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom17
|
||||
%12 = load i32, i32* %j, align 4
|
||||
%idxprom19 = sext i32 %12 to i64
|
||||
%arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx18, i64 0, i64 %idxprom19
|
||||
%13 = load float, float* %arrayidx20, align 4
|
||||
%mul = fmul float %10, %13
|
||||
%add = fadd float %7, %mul
|
||||
%14 = load i32, i32* %j, align 4
|
||||
%14 = load i32, i32* %i, align 4
|
||||
%idxprom21 = sext i32 %14 to i64
|
||||
%15 = load i32, i32* %i, align 4
|
||||
%idxprom22 = sext i32 %15 to i64
|
||||
%arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22
|
||||
%arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21
|
||||
%arrayidx22 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom21
|
||||
%15 = load i32, i32* %j, align 4
|
||||
%idxprom23 = sext i32 %15 to i64
|
||||
%arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx22, i64 0, i64 %idxprom23
|
||||
store float %add, float* %arrayidx24, align 4
|
||||
br label %for.inc
|
||||
|
||||
@ -261,9 +261,11 @@ for.end30: ; preds = %for.cond
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.ident = !{!1}
|
||||
|
||||
!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}
|
@ -1,263 +1,235 @@
|
||||
.file "matmul.normalopt.ll"
|
||||
.text
|
||||
.file "matmul.c"
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.p2align 3 # -- Begin function init_array
|
||||
.LCPI0_0:
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl init_array
|
||||
.align 16, 0x90
|
||||
.p2align 4, 0x90
|
||||
.type init_array,@function
|
||||
init_array: # @init_array
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp2:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp3:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp4:
|
||||
.cfi_def_cfa_register %rbp
|
||||
leaq B(%rip), %rax
|
||||
leaq A(%rip), %rcx
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI0_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
|
||||
xorl %r9d, %r9d
|
||||
.p2align 4, 0x90
|
||||
.LBB0_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB0_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
movl $1, %edi
|
||||
xorl %edx, %edx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_2: # %for.body3
|
||||
# Parent Loop BB0_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
andl $1022, %esi # imm = 0x3FE
|
||||
orl $1, %esi
|
||||
xorps %xmm1, %xmm1
|
||||
cvtsi2sdl %esi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, -4(%rcx,%rdi,4)
|
||||
movss %xmm1, -4(%rax,%rdi,4)
|
||||
leal (%r9,%rdx), %esi
|
||||
andl $1023, %esi # imm = 0x3FF
|
||||
addl $1, %esi
|
||||
xorps %xmm1, %xmm1
|
||||
cvtsi2sdl %esi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, (%rcx,%rdi,4)
|
||||
movss %xmm1, (%rax,%rdi,4)
|
||||
addq $2, %rdi
|
||||
addl %r8d, %edx
|
||||
cmpq $1537, %rdi # imm = 0x601
|
||||
jne .LBB0_2
|
||||
# BB#3: # %for.inc17
|
||||
# %bb.3: # %for.inc17
|
||||
# in Loop: Header=BB0_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
addq $1, %r9
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
addq $6144, %rcx # imm = 0x1800
|
||||
addl $2, %r8d
|
||||
cmpq $1536, %r9 # imm = 0x600
|
||||
jne .LBB0_1
|
||||
# BB#4: # %for.end19
|
||||
# %bb.4: # %for.end19
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp5:
|
||||
.size init_array, .Ltmp5-init_array
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end0:
|
||||
.size init_array, .Lfunc_end0-init_array
|
||||
.cfi_endproc
|
||||
|
||||
.globl print_array
|
||||
.align 16, 0x90
|
||||
# -- End function
|
||||
.globl print_array # -- Begin function print_array
|
||||
.p2align 4, 0x90
|
||||
.type print_array,@function
|
||||
print_array: # @print_array
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp9:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp10:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp11:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
.Ltmp12:
|
||||
.cfi_offset %rbx, -48
|
||||
.Ltmp13:
|
||||
.cfi_offset %r12, -40
|
||||
.Ltmp14:
|
||||
pushq %rax
|
||||
.cfi_offset %rbx, -56
|
||||
.cfi_offset %r12, -48
|
||||
.cfi_offset %r13, -40
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp15:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %r14d, %r14d
|
||||
movl $C, %r15d
|
||||
.align 16, 0x90
|
||||
leaq C(%rip), %r13
|
||||
xorl %eax, %eax
|
||||
movl $3435973837, %r12d # imm = 0xCCCCCCCD
|
||||
leaq .L.str(%rip), %r14
|
||||
.p2align 4, 0x90
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
movq stdout(%rip), %rax
|
||||
movq %r15, %r12
|
||||
movq %rax, -48(%rbp) # 8-byte Spill
|
||||
movq stdout(%rip), %rsi
|
||||
xorl %ebx, %ebx
|
||||
.align 16, 0x90
|
||||
.p2align 4, 0x90
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
vmovss (%r12), %xmm0
|
||||
vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
movl $.L.str, %esi
|
||||
movl %ebx, %eax
|
||||
imulq %r12, %rax
|
||||
shrq $38, %rax
|
||||
leal (%rax,%rax,4), %r15d
|
||||
shll $4, %r15d
|
||||
addl $79, %r15d
|
||||
movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
movb $1, %al
|
||||
movq %rsi, %rdi
|
||||
movq %r14, %rsi
|
||||
callq fprintf
|
||||
movslq %ebx, %rax
|
||||
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
||||
movq %rcx, %rdx
|
||||
shrq $63, %rdx
|
||||
sarq $37, %rcx
|
||||
addl %edx, %ecx
|
||||
imull $80, %ecx, %ecx
|
||||
subl %ecx, %eax
|
||||
cmpl $79, %eax
|
||||
cmpl %ebx, %r15d
|
||||
jne .LBB1_4
|
||||
# BB#3: # %if.then
|
||||
# %bb.3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc
|
||||
callq fputc@PLT
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $4, %r12
|
||||
incq %rbx
|
||||
movq stdout(%rip), %rax
|
||||
addq $1, %rbx
|
||||
movq stdout(%rip), %rsi
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# BB#5: # %for.end
|
||||
# %bb.5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
movq %rax, %rsi
|
||||
callq fputc
|
||||
addq $6144, %r15 # imm = 0x1800
|
||||
incq %r14
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
callq fputc@PLT
|
||||
movq -48(%rbp), %rax # 8-byte Reload
|
||||
addq $1, %rax
|
||||
addq $6144, %r13 # imm = 0x1800
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# BB#6: # %for.end12
|
||||
# %bb.6: # %for.end12
|
||||
addq $8, %rsp
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp16:
|
||||
.size print_array, .Ltmp16-print_array
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end1:
|
||||
.size print_array, .Lfunc_end1-print_array
|
||||
.cfi_endproc
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI2_0:
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl main
|
||||
.align 16, 0x90
|
||||
# -- End function
|
||||
.globl main # -- Begin function main
|
||||
.p2align 4, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp19:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp20:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp21:
|
||||
.cfi_def_cfa_register %rbp
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI2_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB2_1: # %for.cond1.preheader.i
|
||||
callq init_array
|
||||
leaq A(%rip), %rax
|
||||
xorl %r10d, %r10d
|
||||
leaq B(%rip), %r8
|
||||
leaq C(%rip), %r9
|
||||
.p2align 4, 0x90
|
||||
.LBB2_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB2_2: # %for.body3.i
|
||||
# Child Loop BB2_3 Depth 3
|
||||
movq %r8, %rsi
|
||||
xorl %edx, %edx
|
||||
.p2align 4, 0x90
|
||||
.LBB2_2: # %for.body3
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB2_2
|
||||
# BB#3: # %for.inc17.i
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# BB#4:
|
||||
xorl %r8d, %r8d
|
||||
movl $A, %r9d
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_6 Depth 2
|
||||
# Child Loop BB2_7 Depth 3
|
||||
leaq (%r8,%r8,2), %rdx
|
||||
shlq $11, %rdx
|
||||
leaq C(%rdx), %rsi
|
||||
xorl %edi, %edi
|
||||
.align 16, 0x90
|
||||
.LBB2_6: # %for.body3
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_7 Depth 3
|
||||
movl $0, (%rsi)
|
||||
vxorps %xmm0, %xmm0, %xmm0
|
||||
movq $-9437184, %rax # imm = 0xFFFFFFFFFF700000
|
||||
movq %r9, %rcx
|
||||
.align 16, 0x90
|
||||
.LBB2_7: # %for.body8
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_6 Depth=2
|
||||
# Child Loop BB2_3 Depth 3
|
||||
leaq (%r10,%r10,2), %rcx
|
||||
shlq $11, %rcx
|
||||
addq %r9, %rcx
|
||||
leaq (%rcx,%rdx,4), %r11
|
||||
movl $0, (%rcx,%rdx,4)
|
||||
xorps %xmm0, %xmm0
|
||||
movl $2, %ecx
|
||||
movq %rsi, %rdi
|
||||
.p2align 4, 0x90
|
||||
.LBB2_3: # %for.body8
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# Parent Loop BB2_2 Depth=2
|
||||
# => This Inner Loop Header: Depth=3
|
||||
vmovss (%rcx), %xmm1
|
||||
vmulss B+9437184(%rax,%rdi,4), %xmm1, %xmm1
|
||||
vaddss %xmm1, %xmm0, %xmm0
|
||||
addq $4, %rcx
|
||||
movss -8(%rax,%rcx,4), %xmm1 # xmm1 = mem[0],zero,zero,zero
|
||||
mulss (%rdi), %xmm1
|
||||
movss -4(%rax,%rcx,4), %xmm2 # xmm2 = mem[0],zero,zero,zero
|
||||
addss %xmm0, %xmm1
|
||||
mulss 6144(%rdi), %xmm2
|
||||
addss %xmm1, %xmm2
|
||||
movss (%rax,%rcx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
mulss 12288(%rdi), %xmm0
|
||||
addss %xmm2, %xmm0
|
||||
addq $3, %rcx
|
||||
addq $18432, %rdi # imm = 0x4800
|
||||
cmpq $1538, %rcx # imm = 0x602
|
||||
jne .LBB2_3
|
||||
# %bb.4: # %for.inc25
|
||||
# in Loop: Header=BB2_2 Depth=2
|
||||
movss %xmm0, (%r11)
|
||||
addq $1, %rdx
|
||||
addq $4, %rsi
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
jne .LBB2_2
|
||||
# %bb.5: # %for.inc28
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
addq $1, %r10
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
jne .LBB2_7
|
||||
# BB#8: # %for.inc25
|
||||
# in Loop: Header=BB2_6 Depth=2
|
||||
vmovss %xmm0, (%rsi)
|
||||
leaq C+4(%rdx,%rdi,4), %rsi
|
||||
incq %rdi
|
||||
cmpq $1536, %rdi # imm = 0x600
|
||||
jne .LBB2_6
|
||||
# BB#9: # %for.inc28
|
||||
# in Loop: Header=BB2_5 Depth=1
|
||||
addq $6144, %r9 # imm = 0x1800
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
jne .LBB2_5
|
||||
# BB#10: # %for.end30
|
||||
cmpq $1536, %r10 # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# %bb.6: # %for.end30
|
||||
xorl %eax, %eax
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp22:
|
||||
.size main, .Ltmp22-main
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end2:
|
||||
.size main, .Lfunc_end2-main
|
||||
.cfi_endproc
|
||||
|
||||
# -- End function
|
||||
.type A,@object # @A
|
||||
.comm A,9437184,16
|
||||
.type B,@object # @B
|
||||
@ -265,10 +237,11 @@ main: # @main
|
||||
.type .L.str,@object # @.str
|
||||
.section .rodata.str1.1,"aMS",@progbits,1
|
||||
.L.str:
|
||||
.asciz "%lf "
|
||||
.asciz "%lf "
|
||||
.size .L.str, 5
|
||||
|
||||
.type C,@object # @C
|
||||
.comm C,9437184,16
|
||||
|
||||
.ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
|
||||
.section ".note.GNU-stack","",@progbits
|
||||
|
@ -1,385 +1,645 @@
|
||||
.file "matmul.polly.interchanged+tiled+vector.ll"
|
||||
.text
|
||||
.file "matmul.c"
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.p2align 3 # -- Begin function init_array
|
||||
.LCPI0_0:
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl init_array
|
||||
.align 16, 0x90
|
||||
.p2align 4, 0x90
|
||||
.type init_array,@function
|
||||
init_array: # @init_array
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp2:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp3:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp4:
|
||||
.cfi_def_cfa_register %rbp
|
||||
leaq B(%rip), %rax
|
||||
leaq A(%rip), %rcx
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI0_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB0_1: # %polly.loop_preheader3
|
||||
movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
|
||||
xorl %r9d, %r9d
|
||||
.p2align 4, 0x90
|
||||
.LBB0_1: # %polly.loop_header
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB0_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB0_2: # %polly.loop_header2
|
||||
movl $1, %edi
|
||||
xorl %edx, %edx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_2: # %polly.loop_header1
|
||||
# Parent Loop BB0_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
andl $1022, %esi # imm = 0x3FE
|
||||
orl $1, %esi
|
||||
xorps %xmm1, %xmm1
|
||||
cvtsi2sdl %esi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, -4(%rcx,%rdi,4)
|
||||
movss %xmm1, -4(%rax,%rdi,4)
|
||||
leal (%r9,%rdx), %esi
|
||||
andl $1023, %esi # imm = 0x3FF
|
||||
addl $1, %esi
|
||||
xorps %xmm1, %xmm1
|
||||
cvtsi2sdl %esi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, (%rcx,%rdi,4)
|
||||
movss %xmm1, (%rax,%rdi,4)
|
||||
addq $2, %rdi
|
||||
addl %r8d, %edx
|
||||
cmpq $1537, %rdi # imm = 0x601
|
||||
jne .LBB0_2
|
||||
# BB#3: # %polly.loop_exit4
|
||||
# %bb.3: # %polly.loop_exit3
|
||||
# in Loop: Header=BB0_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
addq $1, %r9
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
addq $6144, %rcx # imm = 0x1800
|
||||
addl $2, %r8d
|
||||
cmpq $1536, %r9 # imm = 0x600
|
||||
jne .LBB0_1
|
||||
# BB#4: # %polly.loop_exit
|
||||
# %bb.4: # %polly.exiting
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp5:
|
||||
.size init_array, .Ltmp5-init_array
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end0:
|
||||
.size init_array, .Lfunc_end0-init_array
|
||||
.cfi_endproc
|
||||
|
||||
.globl print_array
|
||||
.align 16, 0x90
|
||||
# -- End function
|
||||
.globl print_array # -- Begin function print_array
|
||||
.p2align 4, 0x90
|
||||
.type print_array,@function
|
||||
print_array: # @print_array
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp9:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp10:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp11:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
.Ltmp12:
|
||||
.cfi_offset %rbx, -48
|
||||
.Ltmp13:
|
||||
.cfi_offset %r12, -40
|
||||
.Ltmp14:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp15:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %r14d, %r14d
|
||||
movl $C, %r15d
|
||||
.align 16, 0x90
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
movq stdout(%rip), %rax
|
||||
movq %r15, %r12
|
||||
xorl %ebx, %ebx
|
||||
.align 16, 0x90
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
vmovss (%r12), %xmm0
|
||||
vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
movl $.L.str, %esi
|
||||
movb $1, %al
|
||||
callq fprintf
|
||||
movslq %ebx, %rax
|
||||
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
||||
movq %rcx, %rdx
|
||||
shrq $63, %rdx
|
||||
sarq $37, %rcx
|
||||
addl %edx, %ecx
|
||||
imull $80, %ecx, %ecx
|
||||
subl %ecx, %eax
|
||||
cmpl $79, %eax
|
||||
jne .LBB1_4
|
||||
# BB#3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $4, %r12
|
||||
incq %rbx
|
||||
movq stdout(%rip), %rax
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# BB#5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
movq %rax, %rsi
|
||||
callq fputc
|
||||
addq $6144, %r15 # imm = 0x1800
|
||||
incq %r14
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# BB#6: # %for.end12
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp16:
|
||||
.size print_array, .Ltmp16-print_array
|
||||
.cfi_endproc
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI2_0:
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl main
|
||||
.align 16, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp20:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp21:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp22:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
subq $56, %rsp
|
||||
.Ltmp23:
|
||||
pushq %rax
|
||||
.cfi_offset %rbx, -56
|
||||
.Ltmp24:
|
||||
.cfi_offset %r12, -48
|
||||
.Ltmp25:
|
||||
.cfi_offset %r13, -40
|
||||
.Ltmp26:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp27:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %ebx, %ebx
|
||||
vmovsd .LCPI2_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB2_1: # %polly.loop_preheader3.i
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB2_2: # %polly.loop_header2.i
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %ebx, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %rbx, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB2_2
|
||||
# BB#3: # %polly.loop_exit4.i
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
incq %rbx
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# BB#4: # %polly.loop_preheader3.preheader
|
||||
movl $C, %edi
|
||||
xorl %esi, %esi
|
||||
movl $9437184, %edx # imm = 0x900000
|
||||
callq memset
|
||||
xorl %esi, %esi
|
||||
movl $C+16, %eax
|
||||
movq %rax, -88(%rbp) # 8-byte Spill
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # %polly.loop_preheader17
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_15 Depth 2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
movq %rsi, -56(%rbp) # 8-byte Spill
|
||||
movq %rsi, %rax
|
||||
orq $63, %rax
|
||||
movq %rax, -72(%rbp) # 8-byte Spill
|
||||
leaq -1(%rax), %rax
|
||||
movq %rax, -48(%rbp) # 8-byte Spill
|
||||
xorl %edx, %edx
|
||||
.align 16, 0x90
|
||||
.LBB2_15: # %polly.loop_preheader24
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
movq %rdx, -80(%rbp) # 8-byte Spill
|
||||
leaq -4(%rdx), %rcx
|
||||
movq %rdx, %rax
|
||||
decq %rax
|
||||
cmovsq %rcx, %rax
|
||||
movq %rax, %r15
|
||||
sarq $63, %r15
|
||||
shrq $62, %r15
|
||||
addq %rax, %r15
|
||||
andq $-4, %r15
|
||||
movq %rdx, %r13
|
||||
orq $63, %r13
|
||||
leaq -4(%r13), %rdx
|
||||
xorl %r10d, %r10d
|
||||
movq -88(%rbp), %rax # 8-byte Reload
|
||||
leaq (%rax,%r15,4), %rax
|
||||
movq %rax, -64(%rbp) # 8-byte Spill
|
||||
leaq B+16(,%r15,4), %rbx
|
||||
leaq 4(%r15), %r12
|
||||
.align 16, 0x90
|
||||
.LBB2_8: # %polly.loop_header23
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# => This Loop Header: Depth=3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
cmpq -72(%rbp), %rsi # 8-byte Folded Reload
|
||||
jg .LBB2_13
|
||||
# BB#9: # %polly.loop_header30.preheader
|
||||
# in Loop: Header=BB2_8 Depth=3
|
||||
movq %r10, %rax
|
||||
orq $63, %rax
|
||||
cmpq %rax, %r10
|
||||
jg .LBB2_13
|
||||
# BB#10: # in Loop: Header=BB2_8 Depth=3
|
||||
decq %rax
|
||||
movq -64(%rbp), %r14 # 8-byte Reload
|
||||
movq -56(%rbp), %r11 # 8-byte Reload
|
||||
.align 16, 0x90
|
||||
.LBB2_11: # %polly.loop_header37.preheader
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# => This Loop Header: Depth=4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
cmpq %r13, %r12
|
||||
movq %rbx, %r8
|
||||
movq %r10, %rsi
|
||||
jg .LBB2_12
|
||||
.align 16, 0x90
|
||||
.LBB2_17: # %polly.loop_header46.preheader
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# Parent Loop BB2_11 Depth=4
|
||||
# => This Loop Header: Depth=5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
leaq (%r11,%r11,2), %rcx
|
||||
shlq $11, %rcx
|
||||
vbroadcastss A(%rcx,%rsi,4), %xmm0
|
||||
movq %r14, %rdi
|
||||
movq %r8, %r9
|
||||
movq %r15, %rcx
|
||||
.LBB2_18: # %polly.loop_header46
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# Parent Loop BB2_11 Depth=4
|
||||
# Parent Loop BB2_17 Depth=5
|
||||
# => This Inner Loop Header: Depth=6
|
||||
vmulps (%r9), %xmm0, %xmm1
|
||||
vaddps (%rdi), %xmm1, %xmm1
|
||||
vmovaps %xmm1, (%rdi)
|
||||
addq $16, %rdi
|
||||
addq $16, %r9
|
||||
addq $4, %rcx
|
||||
cmpq %rdx, %rcx
|
||||
jle .LBB2_18
|
||||
# BB#16: # %polly.loop_exit48
|
||||
# in Loop: Header=BB2_17 Depth=5
|
||||
addq $6144, %r8 # imm = 0x1800
|
||||
cmpq %rax, %rsi
|
||||
leaq 1(%rsi), %rsi
|
||||
jle .LBB2_17
|
||||
.align 16, 0x90
|
||||
.LBB2_12: # %polly.loop_exit39
|
||||
# in Loop: Header=BB2_11 Depth=4
|
||||
addq $6144, %r14 # imm = 0x1800
|
||||
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
|
||||
leaq 1(%r11), %r11
|
||||
jle .LBB2_11
|
||||
.align 16, 0x90
|
||||
.LBB2_13: # %polly.loop_exit32
|
||||
# in Loop: Header=BB2_8 Depth=3
|
||||
addq $393216, %rbx # imm = 0x60000
|
||||
cmpq $1472, %r10 # imm = 0x5C0
|
||||
leaq 64(%r10), %r10
|
||||
movq -56(%rbp), %rsi # 8-byte Reload
|
||||
jl .LBB2_8
|
||||
# BB#14: # %polly.loop_exit25
|
||||
# in Loop: Header=BB2_15 Depth=2
|
||||
movq -80(%rbp), %rdx # 8-byte Reload
|
||||
cmpq $1472, %rdx # imm = 0x5C0
|
||||
leaq 64(%rdx), %rdx
|
||||
jl .LBB2_15
|
||||
# BB#6: # %polly.loop_exit18
|
||||
# in Loop: Header=BB2_5 Depth=1
|
||||
addq $393216, -88(%rbp) # 8-byte Folded Spill
|
||||
# imm = 0x60000
|
||||
cmpq $1472, %rsi # imm = 0x5C0
|
||||
leaq 64(%rsi), %rsi
|
||||
jl .LBB2_5
|
||||
# BB#7: # %polly.loop_exit11
|
||||
leaq C(%rip), %r13
|
||||
xorl %eax, %eax
|
||||
addq $56, %rsp
|
||||
movl $3435973837, %r12d # imm = 0xCCCCCCCD
|
||||
leaq .L.str(%rip), %r14
|
||||
.p2align 4, 0x90
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
movq %rax, -48(%rbp) # 8-byte Spill
|
||||
movq stdout(%rip), %rsi
|
||||
xorl %ebx, %ebx
|
||||
.p2align 4, 0x90
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ebx, %eax
|
||||
imulq %r12, %rax
|
||||
shrq $38, %rax
|
||||
leal (%rax,%rax,4), %r15d
|
||||
shll $4, %r15d
|
||||
addl $79, %r15d
|
||||
movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
movb $1, %al
|
||||
movq %rsi, %rdi
|
||||
movq %r14, %rsi
|
||||
callq fprintf
|
||||
cmpl %ebx, %r15d
|
||||
jne .LBB1_4
|
||||
# %bb.3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc@PLT
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $1, %rbx
|
||||
movq stdout(%rip), %rsi
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# %bb.5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
callq fputc@PLT
|
||||
movq -48(%rbp), %rax # 8-byte Reload
|
||||
addq $1, %rax
|
||||
addq $6144, %r13 # imm = 0x1800
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# %bb.6: # %for.end12
|
||||
addq $8, %rsp
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp28:
|
||||
.size main, .Ltmp28-main
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end1:
|
||||
.size print_array, .Lfunc_end1-print_array
|
||||
.cfi_endproc
|
||||
|
||||
# -- End function
|
||||
.globl main # -- Begin function main
|
||||
.p2align 4, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
subq $264, %rsp # imm = 0x108
|
||||
.cfi_offset %rbx, -56
|
||||
.cfi_offset %r12, -48
|
||||
.cfi_offset %r13, -40
|
||||
.cfi_offset %r14, -32
|
||||
.cfi_offset %r15, -24
|
||||
callq init_array
|
||||
leaq C(%rip), %rdi
|
||||
xorl %eax, %eax
|
||||
movq %rax, -48(%rbp) # 8-byte Spill
|
||||
xorl %esi, %esi
|
||||
movl $9437184, %edx # imm = 0x900000
|
||||
callq memset@PLT
|
||||
movl $64, %eax
|
||||
movq %rax, -80(%rbp) # 8-byte Spill
|
||||
leaq A(%rip), %rax
|
||||
movq %rax, -72(%rbp) # 8-byte Spill
|
||||
.p2align 4, 0x90
|
||||
.LBB2_1: # %polly.loop_header8
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
# Child Loop BB2_3 Depth 3
|
||||
# Child Loop BB2_4 Depth 4
|
||||
# Child Loop BB2_5 Depth 5
|
||||
leaq B+192(%rip), %r9
|
||||
xorl %edi, %edi
|
||||
xorl %eax, %eax
|
||||
.p2align 4, 0x90
|
||||
.LBB2_2: # %polly.loop_header14
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_3 Depth 3
|
||||
# Child Loop BB2_4 Depth 4
|
||||
# Child Loop BB2_5 Depth 5
|
||||
movq %rax, -168(%rbp) # 8-byte Spill
|
||||
movq %rdi, -176(%rbp) # 8-byte Spill
|
||||
shlq $6, %rdi
|
||||
leaq 16(%rdi), %rdx
|
||||
leaq 32(%rdi), %rsi
|
||||
leaq 48(%rdi), %rcx
|
||||
movq -72(%rbp), %r12 # 8-byte Reload
|
||||
movq %r9, -184(%rbp) # 8-byte Spill
|
||||
xorl %eax, %eax
|
||||
.p2align 4, 0x90
|
||||
.LBB2_3: # %polly.loop_header20
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# Parent Loop BB2_2 Depth=2
|
||||
# => This Loop Header: Depth=3
|
||||
# Child Loop BB2_4 Depth 4
|
||||
# Child Loop BB2_5 Depth 5
|
||||
movq %rax, -192(%rbp) # 8-byte Spill
|
||||
movq %r12, -200(%rbp) # 8-byte Spill
|
||||
movq -48(%rbp), %r14 # 8-byte Reload
|
||||
.p2align 4, 0x90
|
||||
.LBB2_4: # %polly.loop_header26
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# Parent Loop BB2_2 Depth=2
|
||||
# Parent Loop BB2_3 Depth=3
|
||||
# => This Loop Header: Depth=4
|
||||
# Child Loop BB2_5 Depth 5
|
||||
leaq (%r14,%r14,2), %rbx
|
||||
shlq $11, %rbx
|
||||
leaq C(%rip), %rax
|
||||
addq %rax, %rbx
|
||||
leaq (%rbx,%rdi,4), %r8
|
||||
leaq (%rbx,%rdx,4), %r15
|
||||
leaq (%rbx,%rsi,4), %r10
|
||||
leaq (%rbx,%rcx,4), %r11
|
||||
movups (%rbx,%rdi,4), %xmm8
|
||||
movups 16(%rbx,%rdi,4), %xmm0
|
||||
movaps %xmm0, -144(%rbp) # 16-byte Spill
|
||||
movups 32(%rbx,%rdi,4), %xmm6
|
||||
movups 48(%rbx,%rdi,4), %xmm1
|
||||
movups (%rbx,%rdx,4), %xmm15
|
||||
movups 16(%rbx,%rdx,4), %xmm0
|
||||
movaps %xmm0, -64(%rbp) # 16-byte Spill
|
||||
movups 32(%rbx,%rdx,4), %xmm0
|
||||
movaps %xmm0, -96(%rbp) # 16-byte Spill
|
||||
movups 48(%rbx,%rdx,4), %xmm0
|
||||
movaps %xmm0, -112(%rbp) # 16-byte Spill
|
||||
movups (%rbx,%rsi,4), %xmm11
|
||||
movups 16(%rbx,%rsi,4), %xmm0
|
||||
movaps %xmm0, -160(%rbp) # 16-byte Spill
|
||||
movups 32(%rbx,%rsi,4), %xmm12
|
||||
movups 48(%rbx,%rsi,4), %xmm0
|
||||
movaps %xmm0, -128(%rbp) # 16-byte Spill
|
||||
movups (%rbx,%rcx,4), %xmm9
|
||||
movups 16(%rbx,%rcx,4), %xmm13
|
||||
movups 32(%rbx,%rcx,4), %xmm2
|
||||
movups 48(%rbx,%rcx,4), %xmm3
|
||||
movq %r9, %rbx
|
||||
movl $0, %r13d
|
||||
.p2align 4, 0x90
|
||||
.LBB2_5: # %vector.ph
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# Parent Loop BB2_2 Depth=2
|
||||
# Parent Loop BB2_3 Depth=3
|
||||
# Parent Loop BB2_4 Depth=4
|
||||
# => This Inner Loop Header: Depth=5
|
||||
movaps %xmm12, -240(%rbp) # 16-byte Spill
|
||||
movaps %xmm2, -256(%rbp) # 16-byte Spill
|
||||
movaps %xmm3, -272(%rbp) # 16-byte Spill
|
||||
movaps %xmm8, %xmm10
|
||||
movaps -144(%rbp), %xmm7 # 16-byte Reload
|
||||
unpcklps %xmm7, %xmm10 # xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
|
||||
movaps %xmm1, %xmm4
|
||||
shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
|
||||
shufps $36, %xmm4, %xmm10 # xmm10 = xmm10[0,1],xmm4[2,0]
|
||||
movaps %xmm7, %xmm5
|
||||
shufps $17, %xmm8, %xmm5 # xmm5 = xmm5[1,0],xmm8[1,0]
|
||||
movaps %xmm6, %xmm4
|
||||
unpcklps %xmm1, %xmm4 # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
shufps $226, %xmm4, %xmm5 # xmm5 = xmm5[2,0],xmm4[2,3]
|
||||
movaps %xmm8, %xmm12
|
||||
unpckhps %xmm7, %xmm12 # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
|
||||
movaps %xmm1, %xmm4
|
||||
shufps $34, %xmm6, %xmm4 # xmm4 = xmm4[2,0],xmm6[2,0]
|
||||
shufps $36, %xmm4, %xmm12 # xmm12 = xmm12[0,1],xmm4[2,0]
|
||||
shufps $51, %xmm8, %xmm7 # xmm7 = xmm7[3,0],xmm8[3,0]
|
||||
unpckhps %xmm1, %xmm6 # xmm6 = xmm6[2],xmm1[2],xmm6[3],xmm1[3]
|
||||
shufps $226, %xmm6, %xmm7 # xmm7 = xmm7[2,0],xmm6[2,3]
|
||||
movaps -160(%rbx), %xmm0
|
||||
movaps -144(%rbx), %xmm1
|
||||
movaps %xmm1, %xmm6
|
||||
shufps $0, %xmm0, %xmm6 # xmm6 = xmm6[0,0],xmm0[0,0]
|
||||
movaps -192(%rbx), %xmm3
|
||||
movaps -176(%rbx), %xmm4
|
||||
movaps %xmm3, %xmm8
|
||||
unpcklps %xmm4, %xmm8 # xmm8 = xmm8[0],xmm4[0],xmm8[1],xmm4[1]
|
||||
shufps $36, %xmm6, %xmm8 # xmm8 = xmm8[0,1],xmm6[2,0]
|
||||
movaps %xmm0, %xmm2
|
||||
unpcklps %xmm1, %xmm2 # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
movaps %xmm4, %xmm6
|
||||
shufps $17, %xmm3, %xmm6 # xmm6 = xmm6[1,0],xmm3[1,0]
|
||||
shufps $226, %xmm2, %xmm6 # xmm6 = xmm6[2,0],xmm2[2,3]
|
||||
movaps %xmm1, %xmm2
|
||||
shufps $34, %xmm0, %xmm2 # xmm2 = xmm2[2,0],xmm0[2,0]
|
||||
movaps %xmm3, %xmm14
|
||||
unpckhps %xmm4, %xmm14 # xmm14 = xmm14[2],xmm4[2],xmm14[3],xmm4[3]
|
||||
shufps $36, %xmm2, %xmm14 # xmm14 = xmm14[0,1],xmm2[2,0]
|
||||
unpckhps %xmm1, %xmm0 # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
shufps $51, %xmm3, %xmm4 # xmm4 = xmm4[3,0],xmm3[3,0]
|
||||
shufps $226, %xmm0, %xmm4 # xmm4 = xmm4[2,0],xmm0[2,3]
|
||||
movss (%r12,%r13,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
|
||||
mulps %xmm0, %xmm8
|
||||
addps %xmm10, %xmm8
|
||||
mulps %xmm0, %xmm6
|
||||
addps %xmm5, %xmm6
|
||||
mulps %xmm0, %xmm14
|
||||
addps %xmm12, %xmm14
|
||||
mulps %xmm0, %xmm4
|
||||
movaps %xmm0, %xmm5
|
||||
addps %xmm7, %xmm4
|
||||
movaps %xmm14, %xmm0
|
||||
unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
movaps %xmm6, %xmm1
|
||||
shufps $51, %xmm8, %xmm1 # xmm1 = xmm1[3,0],xmm8[3,0]
|
||||
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
|
||||
movaps %xmm1, -304(%rbp) # 16-byte Spill
|
||||
movaps %xmm4, %xmm0
|
||||
shufps $34, %xmm14, %xmm0 # xmm0 = xmm0[2,0],xmm14[2,0]
|
||||
movaps %xmm8, %xmm1
|
||||
unpckhps %xmm6, %xmm1 # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
|
||||
shufps $36, %xmm0, %xmm1 # xmm1 = xmm1[0,1],xmm0[2,0]
|
||||
movaps %xmm1, -288(%rbp) # 16-byte Spill
|
||||
movaps %xmm14, %xmm0
|
||||
unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
|
||||
movaps %xmm6, %xmm1
|
||||
shufps $17, %xmm8, %xmm1 # xmm1 = xmm1[1,0],xmm8[1,0]
|
||||
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
|
||||
movaps %xmm1, -144(%rbp) # 16-byte Spill
|
||||
shufps $0, %xmm14, %xmm4 # xmm4 = xmm4[0,0],xmm14[0,0]
|
||||
unpcklps %xmm6, %xmm8 # xmm8 = xmm8[0],xmm6[0],xmm8[1],xmm6[1]
|
||||
shufps $36, %xmm4, %xmm8 # xmm8 = xmm8[0,1],xmm4[2,0]
|
||||
movaps %xmm15, %xmm14
|
||||
movaps -64(%rbp), %xmm4 # 16-byte Reload
|
||||
unpcklps %xmm4, %xmm14 # xmm14 = xmm14[0],xmm4[0],xmm14[1],xmm4[1]
|
||||
movaps -112(%rbp), %xmm1 # 16-byte Reload
|
||||
movaps %xmm1, %xmm0
|
||||
movaps -96(%rbp), %xmm3 # 16-byte Reload
|
||||
shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
|
||||
shufps $36, %xmm0, %xmm14 # xmm14 = xmm14[0,1],xmm0[2,0]
|
||||
movaps %xmm4, %xmm12
|
||||
shufps $17, %xmm15, %xmm12 # xmm12 = xmm12[1,0],xmm15[1,0]
|
||||
movaps %xmm3, %xmm2
|
||||
unpcklps %xmm1, %xmm2 # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
shufps $226, %xmm2, %xmm12 # xmm12 = xmm12[2,0],xmm2[2,3]
|
||||
movaps %xmm15, %xmm7
|
||||
unpckhps %xmm4, %xmm7 # xmm7 = xmm7[2],xmm4[2],xmm7[3],xmm4[3]
|
||||
movaps %xmm1, %xmm2
|
||||
shufps $34, %xmm3, %xmm2 # xmm2 = xmm2[2,0],xmm3[2,0]
|
||||
shufps $36, %xmm2, %xmm7 # xmm7 = xmm7[0,1],xmm2[2,0]
|
||||
shufps $51, %xmm15, %xmm4 # xmm4 = xmm4[3,0],xmm15[3,0]
|
||||
unpckhps %xmm1, %xmm3 # xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
|
||||
shufps $226, %xmm3, %xmm4 # xmm4 = xmm4[2,0],xmm3[2,3]
|
||||
movaps %xmm4, -64(%rbp) # 16-byte Spill
|
||||
movaps -96(%rbx), %xmm2
|
||||
movaps -80(%rbx), %xmm1
|
||||
movaps %xmm1, %xmm4
|
||||
shufps $0, %xmm2, %xmm4 # xmm4 = xmm4[0,0],xmm2[0,0]
|
||||
movaps -112(%rbx), %xmm10
|
||||
movaps -128(%rbx), %xmm0
|
||||
movaps %xmm0, %xmm15
|
||||
unpcklps %xmm10, %xmm15 # xmm15 = xmm15[0],xmm10[0],xmm15[1],xmm10[1]
|
||||
shufps $36, %xmm4, %xmm15 # xmm15 = xmm15[0,1],xmm4[2,0]
|
||||
movaps %xmm2, %xmm4
|
||||
unpcklps %xmm1, %xmm4 # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
movaps %xmm10, %xmm6
|
||||
shufps $17, %xmm0, %xmm6 # xmm6 = xmm6[1,0],xmm0[1,0]
|
||||
shufps $226, %xmm4, %xmm6 # xmm6 = xmm6[2,0],xmm4[2,3]
|
||||
movaps %xmm1, %xmm3
|
||||
shufps $34, %xmm2, %xmm3 # xmm3 = xmm3[2,0],xmm2[2,0]
|
||||
movaps %xmm0, %xmm4
|
||||
unpckhps %xmm10, %xmm4 # xmm4 = xmm4[2],xmm10[2],xmm4[3],xmm10[3]
|
||||
shufps $36, %xmm3, %xmm4 # xmm4 = xmm4[0,1],xmm3[2,0]
|
||||
unpckhps %xmm1, %xmm2 # xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
|
||||
shufps $51, %xmm0, %xmm10 # xmm10 = xmm10[3,0],xmm0[3,0]
|
||||
shufps $226, %xmm2, %xmm10 # xmm10 = xmm10[2,0],xmm2[2,3]
|
||||
movaps %xmm5, -224(%rbp) # 16-byte Spill
|
||||
mulps %xmm5, %xmm15
|
||||
addps %xmm14, %xmm15
|
||||
mulps %xmm5, %xmm6
|
||||
addps %xmm12, %xmm6
|
||||
mulps %xmm5, %xmm4
|
||||
addps %xmm7, %xmm4
|
||||
mulps %xmm5, %xmm10
|
||||
addps -64(%rbp), %xmm10 # 16-byte Folded Reload
|
||||
movaps %xmm4, %xmm0
|
||||
unpckhps %xmm10, %xmm0 # xmm0 = xmm0[2],xmm10[2],xmm0[3],xmm10[3]
|
||||
movaps %xmm6, %xmm1
|
||||
shufps $51, %xmm15, %xmm1 # xmm1 = xmm1[3,0],xmm15[3,0]
|
||||
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
|
||||
movaps %xmm1, -112(%rbp) # 16-byte Spill
|
||||
movaps %xmm10, %xmm0
|
||||
shufps $34, %xmm4, %xmm0 # xmm0 = xmm0[2,0],xmm4[2,0]
|
||||
movaps %xmm15, %xmm1
|
||||
unpckhps %xmm6, %xmm1 # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
|
||||
shufps $36, %xmm0, %xmm1 # xmm1 = xmm1[0,1],xmm0[2,0]
|
||||
movaps %xmm1, -96(%rbp) # 16-byte Spill
|
||||
movaps %xmm4, %xmm0
|
||||
unpcklps %xmm10, %xmm0 # xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
|
||||
movaps %xmm6, %xmm1
|
||||
shufps $17, %xmm15, %xmm1 # xmm1 = xmm1[1,0],xmm15[1,0]
|
||||
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
|
||||
movaps %xmm1, -64(%rbp) # 16-byte Spill
|
||||
shufps $0, %xmm4, %xmm10 # xmm10 = xmm10[0,0],xmm4[0,0]
|
||||
unpcklps %xmm6, %xmm15 # xmm15 = xmm15[0],xmm6[0],xmm15[1],xmm6[1]
|
||||
shufps $36, %xmm10, %xmm15 # xmm15 = xmm15[0,1],xmm10[2,0]
|
||||
movaps %xmm11, %xmm10
|
||||
movaps -160(%rbp), %xmm14 # 16-byte Reload
|
||||
unpcklps %xmm14, %xmm10 # xmm10 = xmm10[0],xmm14[0],xmm10[1],xmm14[1]
|
||||
movaps -128(%rbp), %xmm2 # 16-byte Reload
|
||||
movaps %xmm2, %xmm0
|
||||
movaps -240(%rbp), %xmm3 # 16-byte Reload
|
||||
shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
|
||||
shufps $36, %xmm0, %xmm10 # xmm10 = xmm10[0,1],xmm0[2,0]
|
||||
movaps %xmm14, %xmm12
|
||||
shufps $17, %xmm11, %xmm12 # xmm12 = xmm12[1,0],xmm11[1,0]
|
||||
movaps %xmm3, %xmm0
|
||||
unpcklps %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
shufps $226, %xmm0, %xmm12 # xmm12 = xmm12[2,0],xmm0[2,3]
|
||||
movaps %xmm11, %xmm0
|
||||
unpckhps %xmm14, %xmm0 # xmm0 = xmm0[2],xmm14[2],xmm0[3],xmm14[3]
|
||||
movaps %xmm2, %xmm1
|
||||
shufps $34, %xmm3, %xmm1 # xmm1 = xmm1[2,0],xmm3[2,0]
|
||||
shufps $36, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,0]
|
||||
shufps $51, %xmm11, %xmm14 # xmm14 = xmm14[3,0],xmm11[3,0]
|
||||
unpckhps %xmm2, %xmm3 # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
|
||||
shufps $226, %xmm3, %xmm14 # xmm14 = xmm14[2,0],xmm3[2,3]
|
||||
movaps -32(%rbx), %xmm1
|
||||
movaps -16(%rbx), %xmm2
|
||||
movaps %xmm2, %xmm3
|
||||
shufps $0, %xmm1, %xmm3 # xmm3 = xmm3[0,0],xmm1[0,0]
|
||||
movaps -48(%rbx), %xmm4
|
||||
movaps -64(%rbx), %xmm5
|
||||
movaps %xmm5, %xmm11
|
||||
unpcklps %xmm4, %xmm11 # xmm11 = xmm11[0],xmm4[0],xmm11[1],xmm4[1]
|
||||
shufps $36, %xmm3, %xmm11 # xmm11 = xmm11[0,1],xmm3[2,0]
|
||||
movaps %xmm1, %xmm3
|
||||
unpcklps %xmm2, %xmm3 # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
|
||||
movaps %xmm4, %xmm7
|
||||
shufps $17, %xmm5, %xmm7 # xmm7 = xmm7[1,0],xmm5[1,0]
|
||||
shufps $226, %xmm3, %xmm7 # xmm7 = xmm7[2,0],xmm3[2,3]
|
||||
movaps %xmm2, %xmm3
|
||||
shufps $34, %xmm1, %xmm3 # xmm3 = xmm3[2,0],xmm1[2,0]
|
||||
movaps %xmm5, %xmm6
|
||||
unpckhps %xmm4, %xmm6 # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
|
||||
shufps $36, %xmm3, %xmm6 # xmm6 = xmm6[0,1],xmm3[2,0]
|
||||
unpckhps %xmm2, %xmm1 # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
shufps $51, %xmm5, %xmm4 # xmm4 = xmm4[3,0],xmm5[3,0]
|
||||
shufps $226, %xmm1, %xmm4 # xmm4 = xmm4[2,0],xmm1[2,3]
|
||||
movaps -224(%rbp), %xmm1 # 16-byte Reload
|
||||
mulps %xmm1, %xmm11
|
||||
addps %xmm10, %xmm11
|
||||
mulps %xmm1, %xmm7
|
||||
addps %xmm12, %xmm7
|
||||
mulps %xmm1, %xmm6
|
||||
addps %xmm0, %xmm6
|
||||
mulps %xmm1, %xmm4
|
||||
addps %xmm14, %xmm4
|
||||
movaps %xmm6, %xmm0
|
||||
unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
movaps %xmm7, %xmm1
|
||||
shufps $51, %xmm11, %xmm1 # xmm1 = xmm1[3,0],xmm11[3,0]
|
||||
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
|
||||
movaps %xmm1, -128(%rbp) # 16-byte Spill
|
||||
movaps %xmm4, %xmm0
|
||||
shufps $34, %xmm6, %xmm0 # xmm0 = xmm0[2,0],xmm6[2,0]
|
||||
movaps %xmm11, %xmm12
|
||||
unpckhps %xmm7, %xmm12 # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
|
||||
shufps $36, %xmm0, %xmm12 # xmm12 = xmm12[0,1],xmm0[2,0]
|
||||
movaps %xmm6, %xmm0
|
||||
unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
|
||||
movaps %xmm7, %xmm1
|
||||
shufps $17, %xmm11, %xmm1 # xmm1 = xmm1[1,0],xmm11[1,0]
|
||||
shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
|
||||
movaps %xmm1, -160(%rbp) # 16-byte Spill
|
||||
shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
|
||||
unpcklps %xmm7, %xmm11 # xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1]
|
||||
shufps $36, %xmm4, %xmm11 # xmm11 = xmm11[0,1],xmm4[2,0]
|
||||
movaps %xmm9, %xmm10
|
||||
unpcklps %xmm13, %xmm10 # xmm10 = xmm10[0],xmm13[0],xmm10[1],xmm13[1]
|
||||
movaps -272(%rbp), %xmm2 # 16-byte Reload
|
||||
movaps %xmm2, %xmm0
|
||||
movaps -256(%rbp), %xmm3 # 16-byte Reload
|
||||
shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
|
||||
shufps $36, %xmm0, %xmm10 # xmm10 = xmm10[0,1],xmm0[2,0]
|
||||
movaps %xmm13, %xmm14
|
||||
shufps $17, %xmm9, %xmm14 # xmm14 = xmm14[1,0],xmm9[1,0]
|
||||
movaps %xmm3, %xmm0
|
||||
unpcklps %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
shufps $226, %xmm0, %xmm14 # xmm14 = xmm14[2,0],xmm0[2,3]
|
||||
movaps %xmm9, %xmm0
|
||||
unpckhps %xmm13, %xmm0 # xmm0 = xmm0[2],xmm13[2],xmm0[3],xmm13[3]
|
||||
movaps %xmm2, %xmm1
|
||||
shufps $34, %xmm3, %xmm1 # xmm1 = xmm1[2,0],xmm3[2,0]
|
||||
shufps $36, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,0]
|
||||
shufps $51, %xmm9, %xmm13 # xmm13 = xmm13[3,0],xmm9[3,0]
|
||||
unpckhps %xmm2, %xmm3 # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
|
||||
shufps $226, %xmm3, %xmm13 # xmm13 = xmm13[2,0],xmm3[2,3]
|
||||
movaps 32(%rbx), %xmm1
|
||||
movaps 48(%rbx), %xmm2
|
||||
movaps %xmm2, %xmm3
|
||||
shufps $0, %xmm1, %xmm3 # xmm3 = xmm3[0,0],xmm1[0,0]
|
||||
movaps 16(%rbx), %xmm4
|
||||
movaps (%rbx), %xmm5
|
||||
movaps %xmm5, %xmm9
|
||||
unpcklps %xmm4, %xmm9 # xmm9 = xmm9[0],xmm4[0],xmm9[1],xmm4[1]
|
||||
shufps $36, %xmm3, %xmm9 # xmm9 = xmm9[0,1],xmm3[2,0]
|
||||
movaps %xmm1, %xmm3
|
||||
unpcklps %xmm2, %xmm3 # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
|
||||
movaps %xmm4, %xmm7
|
||||
shufps $17, %xmm5, %xmm7 # xmm7 = xmm7[1,0],xmm5[1,0]
|
||||
shufps $226, %xmm3, %xmm7 # xmm7 = xmm7[2,0],xmm3[2,3]
|
||||
movaps %xmm2, %xmm3
|
||||
shufps $34, %xmm1, %xmm3 # xmm3 = xmm3[2,0],xmm1[2,0]
|
||||
movaps %xmm5, %xmm6
|
||||
unpckhps %xmm4, %xmm6 # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
|
||||
shufps $36, %xmm3, %xmm6 # xmm6 = xmm6[0,1],xmm3[2,0]
|
||||
unpckhps %xmm2, %xmm1 # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
shufps $51, %xmm5, %xmm4 # xmm4 = xmm4[3,0],xmm5[3,0]
|
||||
shufps $226, %xmm1, %xmm4 # xmm4 = xmm4[2,0],xmm1[2,3]
|
||||
movaps -224(%rbp), %xmm1 # 16-byte Reload
|
||||
mulps %xmm1, %xmm9
|
||||
addps %xmm10, %xmm9
|
||||
mulps %xmm1, %xmm7
|
||||
addps %xmm14, %xmm7
|
||||
mulps %xmm1, %xmm6
|
||||
addps %xmm0, %xmm6
|
||||
mulps %xmm1, %xmm4
|
||||
addps %xmm13, %xmm4
|
||||
movaps %xmm6, %xmm0
|
||||
unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
movaps %xmm7, %xmm3
|
||||
shufps $51, %xmm9, %xmm3 # xmm3 = xmm3[3,0],xmm9[3,0]
|
||||
shufps $226, %xmm0, %xmm3 # xmm3 = xmm3[2,0],xmm0[2,3]
|
||||
movaps %xmm4, %xmm0
|
||||
shufps $34, %xmm6, %xmm0 # xmm0 = xmm0[2,0],xmm6[2,0]
|
||||
movaps %xmm9, %xmm2
|
||||
unpckhps %xmm7, %xmm2 # xmm2 = xmm2[2],xmm7[2],xmm2[3],xmm7[3]
|
||||
shufps $36, %xmm0, %xmm2 # xmm2 = xmm2[0,1],xmm0[2,0]
|
||||
movaps %xmm6, %xmm0
|
||||
unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
|
||||
movaps %xmm7, %xmm13
|
||||
shufps $17, %xmm9, %xmm13 # xmm13 = xmm13[1,0],xmm9[1,0]
|
||||
shufps $226, %xmm0, %xmm13 # xmm13 = xmm13[2,0],xmm0[2,3]
|
||||
shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
|
||||
movaps -288(%rbp), %xmm6 # 16-byte Reload
|
||||
movaps -304(%rbp), %xmm1 # 16-byte Reload
|
||||
unpcklps %xmm7, %xmm9 # xmm9 = xmm9[0],xmm7[0],xmm9[1],xmm7[1]
|
||||
shufps $36, %xmm4, %xmm9 # xmm9 = xmm9[0,1],xmm4[2,0]
|
||||
addq $1, %r13
|
||||
addq $6144, %rbx # imm = 0x1800
|
||||
cmpq $64, %r13
|
||||
jne .LBB2_5
|
||||
# %bb.6: # %polly.loop_exit34
|
||||
# in Loop: Header=BB2_4 Depth=4
|
||||
movups %xmm8, (%r8)
|
||||
movaps -144(%rbp), %xmm0 # 16-byte Reload
|
||||
movups %xmm0, 16(%r8)
|
||||
movups %xmm6, 32(%r8)
|
||||
movups %xmm1, 48(%r8)
|
||||
movaps -112(%rbp), %xmm0 # 16-byte Reload
|
||||
movups %xmm0, 48(%r15)
|
||||
movaps -96(%rbp), %xmm0 # 16-byte Reload
|
||||
movups %xmm0, 32(%r15)
|
||||
movaps -64(%rbp), %xmm0 # 16-byte Reload
|
||||
movups %xmm0, 16(%r15)
|
||||
movups %xmm15, (%r15)
|
||||
movaps -128(%rbp), %xmm0 # 16-byte Reload
|
||||
movups %xmm0, 48(%r10)
|
||||
movaps -160(%rbp), %xmm0 # 16-byte Reload
|
||||
movups %xmm0, 16(%r10)
|
||||
movups %xmm11, (%r10)
|
||||
movups %xmm12, 32(%r10)
|
||||
movups %xmm3, 48(%r11)
|
||||
movups %xmm13, 16(%r11)
|
||||
movups %xmm9, (%r11)
|
||||
movups %xmm2, 32(%r11)
|
||||
addq $1, %r14
|
||||
addq $6144, %r12 # imm = 0x1800
|
||||
cmpq -80(%rbp), %r14 # 8-byte Folded Reload
|
||||
jne .LBB2_4
|
||||
# %bb.7: # %polly.loop_exit28
|
||||
# in Loop: Header=BB2_3 Depth=3
|
||||
movq -192(%rbp), %rax # 8-byte Reload
|
||||
addq $64, %rax
|
||||
addq $393216, %r9 # imm = 0x60000
|
||||
movq -200(%rbp), %r12 # 8-byte Reload
|
||||
addq $256, %r12 # imm = 0x100
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jb .LBB2_3
|
||||
# %bb.8: # %polly.loop_exit22
|
||||
# in Loop: Header=BB2_2 Depth=2
|
||||
movq -168(%rbp), %rax # 8-byte Reload
|
||||
addq $64, %rax
|
||||
movq -176(%rbp), %rdi # 8-byte Reload
|
||||
addq $1, %rdi
|
||||
movq -184(%rbp), %r9 # 8-byte Reload
|
||||
addq $256, %r9 # imm = 0x100
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jb .LBB2_2
|
||||
# %bb.9: # %polly.loop_exit16
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
movq -48(%rbp), %rax # 8-byte Reload
|
||||
movq %rax, %rcx
|
||||
addq $64, %rcx
|
||||
addq $64, -80(%rbp) # 8-byte Folded Spill
|
||||
addq $393216, -72(%rbp) # 8-byte Folded Spill
|
||||
# imm = 0x60000
|
||||
movq %rcx, %rax
|
||||
movq %rcx, -48(%rbp) # 8-byte Spill
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
jb .LBB2_1
|
||||
# %bb.10: # %polly.exiting
|
||||
xorl %eax, %eax
|
||||
addq $264, %rsp # imm = 0x108
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end2:
|
||||
.size main, .Lfunc_end2-main
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.type A,@object # @A
|
||||
.comm A,9437184,16
|
||||
.type B,@object # @B
|
||||
@ -387,10 +647,11 @@ main: # @main
|
||||
.type .L.str,@object # @.str
|
||||
.section .rodata.str1.1,"aMS",@progbits,1
|
||||
.L.str:
|
||||
.asciz "%lf "
|
||||
.asciz "%lf "
|
||||
.size .L.str, 5
|
||||
|
||||
.type C,@object # @C
|
||||
.comm C,9437184,16
|
||||
|
||||
.ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
|
||||
.section ".note.GNU-stack","",@progbits
|
||||
|
@ -1,379 +1,495 @@
|
||||
.file "matmul.polly.interchanged+tiled.ll"
|
||||
.text
|
||||
.file "matmul.c"
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.p2align 3 # -- Begin function init_array
|
||||
.LCPI0_0:
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl init_array
|
||||
.align 16, 0x90
|
||||
.p2align 4, 0x90
|
||||
.type init_array,@function
|
||||
init_array: # @init_array
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp2:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp3:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp4:
|
||||
.cfi_def_cfa_register %rbp
|
||||
leaq B(%rip), %rax
|
||||
leaq A(%rip), %rcx
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI0_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB0_1: # %polly.loop_preheader3
|
||||
movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
|
||||
xorl %r9d, %r9d
|
||||
.p2align 4, 0x90
|
||||
.LBB0_1: # %polly.loop_header
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB0_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB0_2: # %polly.loop_header2
|
||||
movl $1, %edi
|
||||
xorl %edx, %edx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_2: # %polly.loop_header1
|
||||
# Parent Loop BB0_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
andl $1022, %esi # imm = 0x3FE
|
||||
orl $1, %esi
|
||||
xorps %xmm1, %xmm1
|
||||
cvtsi2sdl %esi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, -4(%rcx,%rdi,4)
|
||||
movss %xmm1, -4(%rax,%rdi,4)
|
||||
leal (%r9,%rdx), %esi
|
||||
andl $1023, %esi # imm = 0x3FF
|
||||
addl $1, %esi
|
||||
xorps %xmm1, %xmm1
|
||||
cvtsi2sdl %esi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, (%rcx,%rdi,4)
|
||||
movss %xmm1, (%rax,%rdi,4)
|
||||
addq $2, %rdi
|
||||
addl %r8d, %edx
|
||||
cmpq $1537, %rdi # imm = 0x601
|
||||
jne .LBB0_2
|
||||
# BB#3: # %polly.loop_exit4
|
||||
# %bb.3: # %polly.loop_exit3
|
||||
# in Loop: Header=BB0_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
addq $1, %r9
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
addq $6144, %rcx # imm = 0x1800
|
||||
addl $2, %r8d
|
||||
cmpq $1536, %r9 # imm = 0x600
|
||||
jne .LBB0_1
|
||||
# BB#4: # %polly.loop_exit
|
||||
# %bb.4: # %polly.exiting
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp5:
|
||||
.size init_array, .Ltmp5-init_array
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end0:
|
||||
.size init_array, .Lfunc_end0-init_array
|
||||
.cfi_endproc
|
||||
|
||||
.globl print_array
|
||||
.align 16, 0x90
|
||||
# -- End function
|
||||
.globl print_array # -- Begin function print_array
|
||||
.p2align 4, 0x90
|
||||
.type print_array,@function
|
||||
print_array: # @print_array
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp9:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp10:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp11:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
.Ltmp12:
|
||||
.cfi_offset %rbx, -48
|
||||
.Ltmp13:
|
||||
.cfi_offset %r12, -40
|
||||
.Ltmp14:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp15:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %r14d, %r14d
|
||||
movl $C, %r15d
|
||||
.align 16, 0x90
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
movq stdout(%rip), %rax
|
||||
movq %r15, %r12
|
||||
xorl %ebx, %ebx
|
||||
.align 16, 0x90
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
vmovss (%r12), %xmm0
|
||||
vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
movl $.L.str, %esi
|
||||
movb $1, %al
|
||||
callq fprintf
|
||||
movslq %ebx, %rax
|
||||
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
||||
movq %rcx, %rdx
|
||||
shrq $63, %rdx
|
||||
sarq $37, %rcx
|
||||
addl %edx, %ecx
|
||||
imull $80, %ecx, %ecx
|
||||
subl %ecx, %eax
|
||||
cmpl $79, %eax
|
||||
jne .LBB1_4
|
||||
# BB#3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $4, %r12
|
||||
incq %rbx
|
||||
movq stdout(%rip), %rax
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# BB#5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
movq %rax, %rsi
|
||||
callq fputc
|
||||
addq $6144, %r15 # imm = 0x1800
|
||||
incq %r14
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# BB#6: # %for.end12
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp16:
|
||||
.size print_array, .Ltmp16-print_array
|
||||
.cfi_endproc
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI2_0:
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl main
|
||||
.align 16, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp20:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp21:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp22:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
subq $56, %rsp
|
||||
.Ltmp23:
|
||||
pushq %rax
|
||||
.cfi_offset %rbx, -56
|
||||
.Ltmp24:
|
||||
.cfi_offset %r12, -48
|
||||
.Ltmp25:
|
||||
.cfi_offset %r13, -40
|
||||
.Ltmp26:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp27:
|
||||
.cfi_offset %r15, -24
|
||||
leaq C(%rip), %r13
|
||||
xorl %eax, %eax
|
||||
movl $3435973837, %r12d # imm = 0xCCCCCCCD
|
||||
leaq .L.str(%rip), %r14
|
||||
.p2align 4, 0x90
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
movq %rax, -48(%rbp) # 8-byte Spill
|
||||
movq stdout(%rip), %rsi
|
||||
xorl %ebx, %ebx
|
||||
vmovsd .LCPI2_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB2_1: # %polly.loop_preheader3.i
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB2_2: # %polly.loop_header2.i
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
.p2align 4, 0x90
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %ebx, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %rbx, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB2_2
|
||||
# BB#3: # %polly.loop_exit4.i
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
incq %rbx
|
||||
movl %ebx, %eax
|
||||
imulq %r12, %rax
|
||||
shrq $38, %rax
|
||||
leal (%rax,%rax,4), %r15d
|
||||
shll $4, %r15d
|
||||
addl $79, %r15d
|
||||
movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
movb $1, %al
|
||||
movq %rsi, %rdi
|
||||
movq %r14, %rsi
|
||||
callq fprintf
|
||||
cmpl %ebx, %r15d
|
||||
jne .LBB1_4
|
||||
# %bb.3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc@PLT
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $1, %rbx
|
||||
movq stdout(%rip), %rsi
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# BB#4: # %polly.loop_preheader3.preheader
|
||||
movl $C, %ebx
|
||||
movl $C, %edi
|
||||
xorl %esi, %esi
|
||||
movl $9437184, %edx # imm = 0x900000
|
||||
callq memset
|
||||
xorl %eax, %eax
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # %polly.loop_preheader17
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_15 Depth 2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
movq %rax, -56(%rbp) # 8-byte Spill
|
||||
movq %rbx, -88(%rbp) # 8-byte Spill
|
||||
movq %rax, %rcx
|
||||
orq $63, %rcx
|
||||
movq %rcx, -72(%rbp) # 8-byte Spill
|
||||
leaq -1(%rcx), %rcx
|
||||
movq %rcx, -48(%rbp) # 8-byte Spill
|
||||
movq $-1, %r15
|
||||
movl $B, %ecx
|
||||
movq %rbx, -64(%rbp) # 8-byte Spill
|
||||
xorl %r12d, %r12d
|
||||
.align 16, 0x90
|
||||
.LBB2_15: # %polly.loop_preheader24
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
movq %rcx, -80(%rbp) # 8-byte Spill
|
||||
movq %r12, %r13
|
||||
orq $63, %r13
|
||||
leaq -1(%r13), %rbx
|
||||
xorl %r9d, %r9d
|
||||
movq %rcx, %rdx
|
||||
.align 16, 0x90
|
||||
.LBB2_8: # %polly.loop_header23
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# => This Loop Header: Depth=3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
cmpq -72(%rbp), %rax # 8-byte Folded Reload
|
||||
jg .LBB2_13
|
||||
# BB#9: # %polly.loop_header30.preheader
|
||||
# in Loop: Header=BB2_8 Depth=3
|
||||
movq %r9, %rax
|
||||
orq $63, %rax
|
||||
cmpq %rax, %r9
|
||||
jg .LBB2_13
|
||||
# BB#10: # in Loop: Header=BB2_8 Depth=3
|
||||
decq %rax
|
||||
movq -64(%rbp), %r10 # 8-byte Reload
|
||||
movq -56(%rbp), %r11 # 8-byte Reload
|
||||
.align 16, 0x90
|
||||
.LBB2_11: # %polly.loop_header37.preheader
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# => This Loop Header: Depth=4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
cmpq %r13, %r12
|
||||
movq %rdx, %r14
|
||||
movq %r9, %rcx
|
||||
jg .LBB2_12
|
||||
.align 16, 0x90
|
||||
.LBB2_17: # %polly.loop_header46.preheader
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# Parent Loop BB2_11 Depth=4
|
||||
# => This Loop Header: Depth=5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
leaq (%r11,%r11,2), %rsi
|
||||
shlq $11, %rsi
|
||||
vmovss A(%rsi,%rcx,4), %xmm0
|
||||
movq %r10, %rdi
|
||||
movq %r14, %r8
|
||||
movq %r15, %rsi
|
||||
.LBB2_18: # %polly.loop_header46
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# Parent Loop BB2_11 Depth=4
|
||||
# Parent Loop BB2_17 Depth=5
|
||||
# => This Inner Loop Header: Depth=6
|
||||
vmulss (%r8), %xmm0, %xmm1
|
||||
vaddss (%rdi), %xmm1, %xmm1
|
||||
vmovss %xmm1, (%rdi)
|
||||
addq $4, %rdi
|
||||
addq $4, %r8
|
||||
incq %rsi
|
||||
cmpq %rbx, %rsi
|
||||
jle .LBB2_18
|
||||
# BB#16: # %polly.loop_exit48
|
||||
# in Loop: Header=BB2_17 Depth=5
|
||||
addq $6144, %r14 # imm = 0x1800
|
||||
cmpq %rax, %rcx
|
||||
leaq 1(%rcx), %rcx
|
||||
jle .LBB2_17
|
||||
.align 16, 0x90
|
||||
.LBB2_12: # %polly.loop_exit39
|
||||
# in Loop: Header=BB2_11 Depth=4
|
||||
addq $6144, %r10 # imm = 0x1800
|
||||
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
|
||||
leaq 1(%r11), %r11
|
||||
jle .LBB2_11
|
||||
.align 16, 0x90
|
||||
.LBB2_13: # %polly.loop_exit32
|
||||
# in Loop: Header=BB2_8 Depth=3
|
||||
addq $393216, %rdx # imm = 0x60000
|
||||
cmpq $1472, %r9 # imm = 0x5C0
|
||||
leaq 64(%r9), %r9
|
||||
movq -56(%rbp), %rax # 8-byte Reload
|
||||
jl .LBB2_8
|
||||
# BB#14: # %polly.loop_exit25
|
||||
# in Loop: Header=BB2_15 Depth=2
|
||||
addq $256, -64(%rbp) # 8-byte Folded Spill
|
||||
# imm = 0x100
|
||||
movq -80(%rbp), %rcx # 8-byte Reload
|
||||
addq $256, %rcx # imm = 0x100
|
||||
addq $64, %r15
|
||||
cmpq $1472, %r12 # imm = 0x5C0
|
||||
leaq 64(%r12), %r12
|
||||
jl .LBB2_15
|
||||
# BB#6: # %polly.loop_exit18
|
||||
# in Loop: Header=BB2_5 Depth=1
|
||||
movq -88(%rbp), %rbx # 8-byte Reload
|
||||
addq $393216, %rbx # imm = 0x60000
|
||||
cmpq $1472, %rax # imm = 0x5C0
|
||||
leaq 64(%rax), %rax
|
||||
jl .LBB2_5
|
||||
# BB#7: # %polly.loop_exit11
|
||||
xorl %eax, %eax
|
||||
addq $56, %rsp
|
||||
jne .LBB1_2
|
||||
# %bb.5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
callq fputc@PLT
|
||||
movq -48(%rbp), %rax # 8-byte Reload
|
||||
addq $1, %rax
|
||||
addq $6144, %r13 # imm = 0x1800
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# %bb.6: # %for.end12
|
||||
addq $8, %rsp
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp28:
|
||||
.size main, .Ltmp28-main
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end1:
|
||||
.size print_array, .Lfunc_end1-print_array
|
||||
.cfi_endproc
|
||||
|
||||
# -- End function
|
||||
.globl main # -- Begin function main
|
||||
.p2align 4, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
.cfi_startproc
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
subq $344, %rsp # imm = 0x158
|
||||
.cfi_offset %rbx, -56
|
||||
.cfi_offset %r12, -48
|
||||
.cfi_offset %r13, -40
|
||||
.cfi_offset %r14, -32
|
||||
.cfi_offset %r15, -24
|
||||
callq init_array
|
||||
leaq C(%rip), %rdi
|
||||
xorl %eax, %eax
|
||||
movq %rax, -48(%rbp) # 8-byte Spill
|
||||
xorl %esi, %esi
|
||||
movl $9437184, %edx # imm = 0x900000
|
||||
callq memset@PLT
|
||||
movl $64, %eax
|
||||
movq %rax, -64(%rbp) # 8-byte Spill
|
||||
leaq A(%rip), %rax
|
||||
movq %rax, -56(%rbp) # 8-byte Spill
|
||||
.p2align 4, 0x90
|
||||
.LBB2_1: # %polly.loop_header8
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
# Child Loop BB2_3 Depth 3
|
||||
# Child Loop BB2_4 Depth 4
|
||||
# Child Loop BB2_5 Depth 5
|
||||
leaq B+240(%rip), %rax
|
||||
xorl %edi, %edi
|
||||
.p2align 4, 0x90
|
||||
.LBB2_2: # %polly.loop_header14
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_3 Depth 3
|
||||
# Child Loop BB2_4 Depth 4
|
||||
# Child Loop BB2_5 Depth 5
|
||||
movq %rdi, %rcx
|
||||
orq $4, %rcx
|
||||
movq %rcx, -80(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $8, %rcx
|
||||
movq %rcx, -264(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $12, %rcx
|
||||
movq %rcx, -256(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $16, %rcx
|
||||
movq %rcx, -248(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $20, %rcx
|
||||
movq %rcx, -240(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $24, %rcx
|
||||
movq %rcx, -232(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $28, %rcx
|
||||
movq %rcx, -224(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $32, %rcx
|
||||
movq %rcx, -216(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $36, %rcx
|
||||
movq %rcx, -208(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $40, %rcx
|
||||
movq %rcx, -200(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $44, %rcx
|
||||
movq %rcx, -192(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $48, %rcx
|
||||
movq %rcx, -184(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $52, %rcx
|
||||
movq %rcx, -176(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $56, %rcx
|
||||
movq %rcx, -168(%rbp) # 8-byte Spill
|
||||
movq %rdi, %rcx
|
||||
orq $60, %rcx
|
||||
movq %rcx, -160(%rbp) # 8-byte Spill
|
||||
movq -56(%rbp), %rdx # 8-byte Reload
|
||||
movq %rax, -136(%rbp) # 8-byte Spill
|
||||
movq %rax, -72(%rbp) # 8-byte Spill
|
||||
xorl %eax, %eax
|
||||
movq %rdi, -272(%rbp) # 8-byte Spill
|
||||
.p2align 4, 0x90
|
||||
.LBB2_3: # %polly.loop_header20
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# Parent Loop BB2_2 Depth=2
|
||||
# => This Loop Header: Depth=3
|
||||
# Child Loop BB2_4 Depth 4
|
||||
# Child Loop BB2_5 Depth 5
|
||||
movq %rax, -144(%rbp) # 8-byte Spill
|
||||
movq %rdx, -152(%rbp) # 8-byte Spill
|
||||
movq -48(%rbp), %rax # 8-byte Reload
|
||||
.p2align 4, 0x90
|
||||
.LBB2_4: # %polly.loop_header26
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# Parent Loop BB2_2 Depth=2
|
||||
# Parent Loop BB2_3 Depth=3
|
||||
# => This Loop Header: Depth=4
|
||||
# Child Loop BB2_5 Depth 5
|
||||
movq %rax, -376(%rbp) # 8-byte Spill
|
||||
leaq (%rax,%rax,2), %rax
|
||||
shlq $11, %rax
|
||||
leaq C(%rip), %rsi
|
||||
addq %rsi, %rax
|
||||
leaq (%rax,%rdi,4), %rcx
|
||||
movq %rcx, -368(%rbp) # 8-byte Spill
|
||||
movq -80(%rbp), %rcx # 8-byte Reload
|
||||
leaq (%rax,%rcx,4), %rcx
|
||||
movq %rcx, -360(%rbp) # 8-byte Spill
|
||||
movq -264(%rbp), %rbx # 8-byte Reload
|
||||
leaq (%rax,%rbx,4), %rcx
|
||||
movq %rcx, -352(%rbp) # 8-byte Spill
|
||||
movq -256(%rbp), %r8 # 8-byte Reload
|
||||
movq %rdi, %rsi
|
||||
leaq (%rax,%r8,4), %rdi
|
||||
movq %rdi, -344(%rbp) # 8-byte Spill
|
||||
movq -248(%rbp), %rdi # 8-byte Reload
|
||||
leaq (%rax,%rdi,4), %rcx
|
||||
movq %rcx, -336(%rbp) # 8-byte Spill
|
||||
movq -240(%rbp), %r9 # 8-byte Reload
|
||||
leaq (%rax,%r9,4), %rcx
|
||||
movq %rcx, -328(%rbp) # 8-byte Spill
|
||||
movq -232(%rbp), %r10 # 8-byte Reload
|
||||
leaq (%rax,%r10,4), %rcx
|
||||
movq %rcx, -320(%rbp) # 8-byte Spill
|
||||
movq -224(%rbp), %r14 # 8-byte Reload
|
||||
leaq (%rax,%r14,4), %rcx
|
||||
movq %rcx, -312(%rbp) # 8-byte Spill
|
||||
movq -216(%rbp), %r15 # 8-byte Reload
|
||||
leaq (%rax,%r15,4), %rcx
|
||||
movq %rcx, -304(%rbp) # 8-byte Spill
|
||||
movq -208(%rbp), %r12 # 8-byte Reload
|
||||
leaq (%rax,%r12,4), %rcx
|
||||
movq %rcx, -296(%rbp) # 8-byte Spill
|
||||
movq -200(%rbp), %r13 # 8-byte Reload
|
||||
leaq (%rax,%r13,4), %rcx
|
||||
movq %rcx, -288(%rbp) # 8-byte Spill
|
||||
movq -192(%rbp), %r11 # 8-byte Reload
|
||||
leaq (%rax,%r11,4), %rcx
|
||||
movq %rcx, -280(%rbp) # 8-byte Spill
|
||||
movaps (%rax,%rsi,4), %xmm15
|
||||
movq -80(%rbp), %rcx # 8-byte Reload
|
||||
movaps (%rax,%rcx,4), %xmm14
|
||||
movaps (%rax,%rbx,4), %xmm13
|
||||
movaps (%rax,%r8,4), %xmm12
|
||||
movaps (%rax,%rdi,4), %xmm11
|
||||
movaps (%rax,%r9,4), %xmm10
|
||||
movaps (%rax,%r10,4), %xmm9
|
||||
movaps (%rax,%r14,4), %xmm8
|
||||
movaps (%rax,%r15,4), %xmm7
|
||||
movaps (%rax,%r12,4), %xmm6
|
||||
movaps (%rax,%r13,4), %xmm5
|
||||
movaps (%rax,%r11,4), %xmm4
|
||||
movq -184(%rbp), %rcx # 8-byte Reload
|
||||
movaps (%rax,%rcx,4), %xmm3
|
||||
movq -176(%rbp), %rsi # 8-byte Reload
|
||||
movaps (%rax,%rsi,4), %xmm0
|
||||
movaps %xmm0, -96(%rbp) # 16-byte Spill
|
||||
movq -168(%rbp), %rbx # 8-byte Reload
|
||||
movaps (%rax,%rbx,4), %xmm0
|
||||
movaps %xmm0, -112(%rbp) # 16-byte Spill
|
||||
movq -160(%rbp), %rdi # 8-byte Reload
|
||||
movaps (%rax,%rdi,4), %xmm0
|
||||
movaps %xmm0, -128(%rbp) # 16-byte Spill
|
||||
leaq (%rax,%rcx,4), %r8
|
||||
leaq (%rax,%rsi,4), %rcx
|
||||
leaq (%rax,%rbx,4), %rsi
|
||||
leaq (%rax,%rdi,4), %rax
|
||||
movq -72(%rbp), %r9 # 8-byte Reload
|
||||
movl $0, %r10d
|
||||
.p2align 4, 0x90
|
||||
.LBB2_5: # %vector.ph
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# Parent Loop BB2_2 Depth=2
|
||||
# Parent Loop BB2_3 Depth=3
|
||||
# Parent Loop BB2_4 Depth=4
|
||||
# => This Inner Loop Header: Depth=5
|
||||
movss (%rdx,%r10,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
|
||||
movaps -240(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm15
|
||||
movaps -224(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm14
|
||||
movaps -208(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm13
|
||||
movaps -192(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm12
|
||||
movaps -176(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm11
|
||||
movaps -160(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm10
|
||||
movaps -144(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm9
|
||||
movaps -128(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm8
|
||||
movaps -112(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm7
|
||||
movaps -96(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm6
|
||||
movaps -80(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm5
|
||||
movaps -64(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm4
|
||||
movaps -48(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
addps %xmm1, %xmm3
|
||||
movaps -32(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
movaps -96(%rbp), %xmm2 # 16-byte Reload
|
||||
addps %xmm1, %xmm2
|
||||
movaps %xmm2, -96(%rbp) # 16-byte Spill
|
||||
movaps -16(%r9), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
movaps -112(%rbp), %xmm2 # 16-byte Reload
|
||||
addps %xmm1, %xmm2
|
||||
movaps %xmm2, -112(%rbp) # 16-byte Spill
|
||||
mulps (%r9), %xmm0
|
||||
movaps -128(%rbp), %xmm1 # 16-byte Reload
|
||||
addps %xmm0, %xmm1
|
||||
movaps %xmm1, -128(%rbp) # 16-byte Spill
|
||||
addq $1, %r10
|
||||
addq $6144, %r9 # imm = 0x1800
|
||||
cmpq $64, %r10
|
||||
jne .LBB2_5
|
||||
# %bb.6: # %polly.loop_exit34
|
||||
# in Loop: Header=BB2_4 Depth=4
|
||||
movq -368(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm15, (%rdi)
|
||||
movq -360(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm14, (%rdi)
|
||||
movq -352(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm13, (%rdi)
|
||||
movq -344(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm12, (%rdi)
|
||||
movq -336(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm11, (%rdi)
|
||||
movq -328(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm10, (%rdi)
|
||||
movq -320(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm9, (%rdi)
|
||||
movq -312(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm8, (%rdi)
|
||||
movq -304(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm7, (%rdi)
|
||||
movq -296(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm6, (%rdi)
|
||||
movq -288(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm5, (%rdi)
|
||||
movq -280(%rbp), %rdi # 8-byte Reload
|
||||
movaps %xmm4, (%rdi)
|
||||
movaps %xmm3, (%r8)
|
||||
movaps -96(%rbp), %xmm0 # 16-byte Reload
|
||||
movaps %xmm0, (%rcx)
|
||||
movaps -112(%rbp), %xmm0 # 16-byte Reload
|
||||
movaps %xmm0, (%rsi)
|
||||
movaps -128(%rbp), %xmm0 # 16-byte Reload
|
||||
movaps %xmm0, (%rax)
|
||||
movq -376(%rbp), %rax # 8-byte Reload
|
||||
addq $1, %rax
|
||||
addq $6144, %rdx # imm = 0x1800
|
||||
cmpq -64(%rbp), %rax # 8-byte Folded Reload
|
||||
movq -272(%rbp), %rdi # 8-byte Reload
|
||||
jne .LBB2_4
|
||||
# %bb.7: # %polly.loop_exit28
|
||||
# in Loop: Header=BB2_3 Depth=3
|
||||
movq -144(%rbp), %rax # 8-byte Reload
|
||||
addq $64, %rax
|
||||
addq $393216, -72(%rbp) # 8-byte Folded Spill
|
||||
# imm = 0x60000
|
||||
movq -152(%rbp), %rdx # 8-byte Reload
|
||||
addq $256, %rdx # imm = 0x100
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jb .LBB2_3
|
||||
# %bb.8: # %polly.loop_exit22
|
||||
# in Loop: Header=BB2_2 Depth=2
|
||||
addq $64, %rdi
|
||||
movq -136(%rbp), %rax # 8-byte Reload
|
||||
addq $256, %rax # imm = 0x100
|
||||
cmpq $1536, %rdi # imm = 0x600
|
||||
jb .LBB2_2
|
||||
# %bb.9: # %polly.loop_exit16
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
movq -48(%rbp), %rax # 8-byte Reload
|
||||
movq %rax, %rcx
|
||||
addq $64, %rcx
|
||||
addq $64, -64(%rbp) # 8-byte Folded Spill
|
||||
addq $393216, -56(%rbp) # 8-byte Folded Spill
|
||||
# imm = 0x60000
|
||||
movq %rcx, %rax
|
||||
movq %rcx, -48(%rbp) # 8-byte Spill
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
jb .LBB2_1
|
||||
# %bb.10: # %polly.exiting
|
||||
xorl %eax, %eax
|
||||
addq $344, %rsp # imm = 0x158
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end2:
|
||||
.size main, .Lfunc_end2-main
|
||||
.cfi_endproc
|
||||
# -- End function
|
||||
.type A,@object # @A
|
||||
.comm A,9437184,16
|
||||
.type B,@object # @B
|
||||
@ -381,10 +497,11 @@ main: # @main
|
||||
.type .L.str,@object # @.str
|
||||
.section .rodata.str1.1,"aMS",@progbits,1
|
||||
.L.str:
|
||||
.asciz "%lf "
|
||||
.asciz "%lf "
|
||||
.size .L.str, 5
|
||||
|
||||
.type C,@object # @C
|
||||
.comm C,9437184,16
|
||||
|
||||
.ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
|
||||
.section ".note.GNU-stack","",@progbits
|
||||
|
@ -1,275 +1,248 @@
|
||||
.file "matmul.polly.interchanged.ll"
|
||||
.text
|
||||
.file "matmul.c"
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.p2align 3 # -- Begin function init_array
|
||||
.LCPI0_0:
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl init_array
|
||||
.align 16, 0x90
|
||||
.p2align 4, 0x90
|
||||
.type init_array,@function
|
||||
init_array: # @init_array
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp2:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp3:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp4:
|
||||
.cfi_def_cfa_register %rbp
|
||||
leaq B(%rip), %rax
|
||||
leaq A(%rip), %rcx
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI0_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB0_1: # %polly.loop_preheader3
|
||||
movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
|
||||
xorl %r9d, %r9d
|
||||
.p2align 4, 0x90
|
||||
.LBB0_1: # %polly.loop_header
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB0_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB0_2: # %polly.loop_header2
|
||||
movl $1, %edi
|
||||
xorl %edx, %edx
|
||||
.p2align 4, 0x90
|
||||
.LBB0_2: # %polly.loop_header1
|
||||
# Parent Loop BB0_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
andl $1022, %esi # imm = 0x3FE
|
||||
orl $1, %esi
|
||||
xorps %xmm1, %xmm1
|
||||
cvtsi2sdl %esi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, -4(%rcx,%rdi,4)
|
||||
movss %xmm1, -4(%rax,%rdi,4)
|
||||
leal (%r9,%rdx), %esi
|
||||
andl $1023, %esi # imm = 0x3FF
|
||||
addl $1, %esi
|
||||
xorps %xmm1, %xmm1
|
||||
cvtsi2sdl %esi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, (%rcx,%rdi,4)
|
||||
movss %xmm1, (%rax,%rdi,4)
|
||||
addq $2, %rdi
|
||||
addl %r8d, %edx
|
||||
cmpq $1537, %rdi # imm = 0x601
|
||||
jne .LBB0_2
|
||||
# BB#3: # %polly.loop_exit4
|
||||
# %bb.3: # %polly.loop_exit3
|
||||
# in Loop: Header=BB0_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
addq $1, %r9
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
addq $6144, %rcx # imm = 0x1800
|
||||
addl $2, %r8d
|
||||
cmpq $1536, %r9 # imm = 0x600
|
||||
jne .LBB0_1
|
||||
# BB#4: # %polly.loop_exit
|
||||
# %bb.4: # %polly.exiting
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp5:
|
||||
.size init_array, .Ltmp5-init_array
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end0:
|
||||
.size init_array, .Lfunc_end0-init_array
|
||||
.cfi_endproc
|
||||
|
||||
.globl print_array
|
||||
.align 16, 0x90
|
||||
# -- End function
|
||||
.globl print_array # -- Begin function print_array
|
||||
.p2align 4, 0x90
|
||||
.type print_array,@function
|
||||
print_array: # @print_array
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp9:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp10:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp11:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
.Ltmp12:
|
||||
.cfi_offset %rbx, -48
|
||||
.Ltmp13:
|
||||
.cfi_offset %r12, -40
|
||||
.Ltmp14:
|
||||
pushq %rax
|
||||
.cfi_offset %rbx, -56
|
||||
.cfi_offset %r12, -48
|
||||
.cfi_offset %r13, -40
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp15:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %r14d, %r14d
|
||||
movl $C, %r15d
|
||||
.align 16, 0x90
|
||||
leaq C(%rip), %r13
|
||||
xorl %eax, %eax
|
||||
movl $3435973837, %r12d # imm = 0xCCCCCCCD
|
||||
leaq .L.str(%rip), %r14
|
||||
.p2align 4, 0x90
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
movq stdout(%rip), %rax
|
||||
movq %r15, %r12
|
||||
movq %rax, -48(%rbp) # 8-byte Spill
|
||||
movq stdout(%rip), %rsi
|
||||
xorl %ebx, %ebx
|
||||
.align 16, 0x90
|
||||
.p2align 4, 0x90
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
vmovss (%r12), %xmm0
|
||||
vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
movl $.L.str, %esi
|
||||
movl %ebx, %eax
|
||||
imulq %r12, %rax
|
||||
shrq $38, %rax
|
||||
leal (%rax,%rax,4), %r15d
|
||||
shll $4, %r15d
|
||||
addl $79, %r15d
|
||||
movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
movb $1, %al
|
||||
movq %rsi, %rdi
|
||||
movq %r14, %rsi
|
||||
callq fprintf
|
||||
movslq %ebx, %rax
|
||||
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
||||
movq %rcx, %rdx
|
||||
shrq $63, %rdx
|
||||
sarq $37, %rcx
|
||||
addl %edx, %ecx
|
||||
imull $80, %ecx, %ecx
|
||||
subl %ecx, %eax
|
||||
cmpl $79, %eax
|
||||
cmpl %ebx, %r15d
|
||||
jne .LBB1_4
|
||||
# BB#3: # %if.then
|
||||
# %bb.3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc
|
||||
callq fputc@PLT
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $4, %r12
|
||||
incq %rbx
|
||||
movq stdout(%rip), %rax
|
||||
addq $1, %rbx
|
||||
movq stdout(%rip), %rsi
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# BB#5: # %for.end
|
||||
# %bb.5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
movq %rax, %rsi
|
||||
callq fputc
|
||||
addq $6144, %r15 # imm = 0x1800
|
||||
incq %r14
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
callq fputc@PLT
|
||||
movq -48(%rbp), %rax # 8-byte Reload
|
||||
addq $1, %rax
|
||||
addq $6144, %r13 # imm = 0x1800
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# BB#6: # %for.end12
|
||||
# %bb.6: # %for.end12
|
||||
addq $8, %rsp
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp16:
|
||||
.size print_array, .Ltmp16-print_array
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end1:
|
||||
.size print_array, .Lfunc_end1-print_array
|
||||
.cfi_endproc
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI2_0:
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl main
|
||||
.align 16, 0x90
|
||||
# -- End function
|
||||
.globl main # -- Begin function main
|
||||
.p2align 4, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
# %bb.0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp20:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp21:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp22:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r14
|
||||
pushq %rbx
|
||||
.Ltmp23:
|
||||
.cfi_offset %rbx, -32
|
||||
.Ltmp24:
|
||||
.cfi_offset %r14, -24
|
||||
xorl %ebx, %ebx
|
||||
vmovsd .LCPI2_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB2_1: # %polly.loop_preheader3.i
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB2_2: # %polly.loop_header2.i
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %ebx, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %rbx, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB2_2
|
||||
# BB#3: # %polly.loop_exit4.i
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
incq %rbx
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# BB#4: # %polly.loop_preheader3.preheader
|
||||
movl $C, %r14d
|
||||
movl $C, %edi
|
||||
callq init_array
|
||||
leaq C(%rip), %rbx
|
||||
xorl %r14d, %r14d
|
||||
xorl %esi, %esi
|
||||
movl $9437184, %edx # imm = 0x900000
|
||||
callq memset
|
||||
xorl %eax, %eax
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # %polly.loop_preheader17
|
||||
movq %rbx, %rdi
|
||||
callq memset@PLT
|
||||
leaq B(%rip), %rax
|
||||
leaq A(%rip), %rcx
|
||||
.p2align 4, 0x90
|
||||
.LBB2_1: # %polly.loop_header8
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_10 Depth 2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
movl $B, %ebx
|
||||
xorl %edx, %edx
|
||||
.align 16, 0x90
|
||||
.LBB2_10: # %polly.loop_preheader24
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
# Child Loop BB2_3 Depth 3
|
||||
movq %rax, %rdx
|
||||
xorl %esi, %esi
|
||||
.p2align 4, 0x90
|
||||
.LBB2_2: # %polly.loop_header14
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
leaq (%rax,%rax,2), %rcx
|
||||
shlq $11, %rcx
|
||||
vmovss A(%rcx,%rdx,4), %xmm0
|
||||
movl $1536, %esi # imm = 0x600
|
||||
movq %r14, %rdi
|
||||
movq %rbx, %rcx
|
||||
.align 16, 0x90
|
||||
.LBB2_8: # %polly.loop_header23
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_10 Depth=2
|
||||
# Child Loop BB2_3 Depth 3
|
||||
leaq (%r14,%r14,2), %rdi
|
||||
shlq $11, %rdi
|
||||
addq %rcx, %rdi
|
||||
movss (%rdi,%rsi,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
|
||||
shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
|
||||
movl $12, %edi
|
||||
.p2align 4, 0x90
|
||||
.LBB2_3: # %vector.body
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# Parent Loop BB2_2 Depth=2
|
||||
# => This Inner Loop Header: Depth=3
|
||||
vmulss (%rcx), %xmm0, %xmm1
|
||||
vaddss (%rdi), %xmm1, %xmm1
|
||||
vmovss %xmm1, (%rdi)
|
||||
addq $4, %rdi
|
||||
addq $4, %rcx
|
||||
decq %rsi
|
||||
jne .LBB2_8
|
||||
# BB#9: # %polly.loop_exit25
|
||||
# in Loop: Header=BB2_10 Depth=2
|
||||
movaps -48(%rdx,%rdi,4), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
movaps -32(%rdx,%rdi,4), %xmm2
|
||||
mulps %xmm0, %xmm2
|
||||
addps -48(%rbx,%rdi,4), %xmm1
|
||||
addps -32(%rbx,%rdi,4), %xmm2
|
||||
movaps %xmm1, -48(%rbx,%rdi,4)
|
||||
movaps %xmm2, -32(%rbx,%rdi,4)
|
||||
movaps -16(%rdx,%rdi,4), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
movaps (%rdx,%rdi,4), %xmm2
|
||||
mulps %xmm0, %xmm2
|
||||
addps -16(%rbx,%rdi,4), %xmm1
|
||||
addps (%rbx,%rdi,4), %xmm2
|
||||
movaps %xmm1, -16(%rbx,%rdi,4)
|
||||
movaps %xmm2, (%rbx,%rdi,4)
|
||||
addq $16, %rdi
|
||||
cmpq $1548, %rdi # imm = 0x60C
|
||||
jne .LBB2_3
|
||||
# %bb.4: # %polly.loop_exit22
|
||||
# in Loop: Header=BB2_2 Depth=2
|
||||
addq $1, %rsi
|
||||
addq $6144, %rdx # imm = 0x1800
|
||||
cmpq $1536, %rsi # imm = 0x600
|
||||
jne .LBB2_2
|
||||
# %bb.5: # %polly.loop_exit16
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
addq $1, %r14
|
||||
addq $6144, %rbx # imm = 0x1800
|
||||
incq %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
jne .LBB2_10
|
||||
# BB#6: # %polly.loop_exit18
|
||||
# in Loop: Header=BB2_5 Depth=1
|
||||
addq $6144, %r14 # imm = 0x1800
|
||||
incq %rax
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jne .LBB2_5
|
||||
# BB#7: # %polly.loop_exit11
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# %bb.6: # %polly.exiting
|
||||
xorl %eax, %eax
|
||||
popq %rbx
|
||||
popq %r14
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp25:
|
||||
.size main, .Ltmp25-main
|
||||
.cfi_def_cfa %rsp, 8
|
||||
retq
|
||||
.Lfunc_end2:
|
||||
.size main, .Lfunc_end2-main
|
||||
.cfi_endproc
|
||||
|
||||
# -- End function
|
||||
.type A,@object # @A
|
||||
.comm A,9437184,16
|
||||
.type B,@object # @B
|
||||
@ -277,10 +250,11 @@ main: # @main
|
||||
.type .L.str,@object # @.str
|
||||
.section .rodata.str1.1,"aMS",@progbits,1
|
||||
.L.str:
|
||||
.asciz "%lf "
|
||||
.asciz "%lf "
|
||||
.size .L.str, 5
|
||||
|
||||
.type C,@object # @C
|
||||
.comm C,9437184,16
|
||||
|
||||
.ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
|
||||
.section ".note.GNU-stack","",@progbits
|
||||
|
@ -1,4 +1,4 @@
|
||||
; ModuleID = 'matmul.s'
|
||||
; ModuleID = 'matmul.ll'
|
||||
source_filename = "matmul.c"
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
@ -6,15 +6,15 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
|
||||
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
|
||||
|
||||
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@stdout = external global %struct._IO_FILE*, align 8
|
||||
@A = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@B = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@stdout = external dso_local global %struct._IO_FILE*, align 8
|
||||
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
|
||||
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@C = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @init_array() #0 {
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
define dso_local void @init_array() #0 {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
@ -22,44 +22,37 @@ entry.split: ; preds = %entry
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %entry.split, %for.inc17
|
||||
%indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]
|
||||
%indvars.iv4 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next5, %for.inc17 ]
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.cond1.preheader, %for.body3
|
||||
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
||||
%0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5
|
||||
%0 = mul nuw nsw i64 %indvars.iv, %indvars.iv4
|
||||
%1 = trunc i64 %0 to i32
|
||||
%rem = srem i32 %1, 1024
|
||||
%add = add nsw i32 %rem, 1
|
||||
%rem = and i32 %1, 1023
|
||||
%add = add nuw nsw i32 %rem, 1
|
||||
%conv = sitofp i32 %add to double
|
||||
%div = fmul double %conv, 5.000000e-01
|
||||
%conv4 = fptrunc double %div to float
|
||||
%arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv
|
||||
%arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv4, i64 %indvars.iv
|
||||
store float %conv4, float* %arrayidx6, align 4
|
||||
%2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5
|
||||
%3 = trunc i64 %2 to i32
|
||||
%rem8 = srem i32 %3, 1024
|
||||
%add9 = add nsw i32 %rem8, 1
|
||||
%conv10 = sitofp i32 %add9 to double
|
||||
%div11 = fmul double %conv10, 5.000000e-01
|
||||
%conv12 = fptrunc double %div11 to float
|
||||
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv
|
||||
store float %conv12, float* %arrayidx16, align 4
|
||||
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv
|
||||
store float %conv4, float* %arrayidx16, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, 1536
|
||||
br i1 %exitcond, label %for.body3, label %for.inc17
|
||||
|
||||
for.inc17: ; preds = %for.body3
|
||||
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
|
||||
%exitcond7 = icmp ne i64 %indvars.iv.next6, 1536
|
||||
br i1 %exitcond7, label %for.cond1.preheader, label %for.end19
|
||||
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
|
||||
%exitcond6 = icmp ne i64 %indvars.iv.next5, 1536
|
||||
br i1 %exitcond6, label %for.cond1.preheader, label %for.end19
|
||||
|
||||
for.end19: ; preds = %for.inc17
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @print_array() #0 {
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
define dso_local void @print_array() #0 {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
@ -79,7 +72,7 @@ for.body3: ; preds = %for.cond1.preheader
|
||||
%conv = fpext float %2 to double
|
||||
%call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2
|
||||
%3 = trunc i64 %indvars.iv to i32
|
||||
%rem = srem i32 %3, 80
|
||||
%rem = urem i32 %3, 80
|
||||
%cmp6 = icmp eq i32 %rem, 79
|
||||
br i1 %cmp6, label %if.then, label %for.inc
|
||||
|
||||
@ -105,10 +98,10 @@ for.end12: ; preds = %for.end
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
|
||||
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @main() #0 {
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
define dso_local i32 @main() #0 {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
@ -128,16 +121,14 @@ for.body3: ; preds = %for.cond1.preheader
|
||||
|
||||
for.body8: ; preds = %for.body3, %for.body8
|
||||
%indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]
|
||||
%arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
|
||||
%0 = load float, float* %arrayidx12, align 4
|
||||
%0 = load float, float* %arrayidx5, align 4
|
||||
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv
|
||||
%1 = load float, float* %arrayidx16, align 4
|
||||
%arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4
|
||||
%2 = load float, float* %arrayidx20, align 4
|
||||
%mul = fmul float %1, %2
|
||||
%add = fadd float %0, %mul
|
||||
%arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
|
||||
store float %add, float* %arrayidx24, align 4
|
||||
store float %add, float* %arrayidx5, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, 1536
|
||||
br i1 %exitcond, label %for.body8, label %for.inc25
|
||||
@ -162,10 +153,12 @@ declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #2
|
||||
; Function Attrs: nounwind
|
||||
declare i32 @fputc(i32, %struct._IO_FILE* nocapture) #2
|
||||
|
||||
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.ident = !{!1}
|
||||
|
||||
!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}
|
||||
|
@ -1,85 +1,83 @@
|
||||
#!/bin/sh -a
|
||||
|
||||
echo "--> 1. Create LLVM-IR from C"
|
||||
clang -S -emit-llvm matmul.c -o matmul.s
|
||||
clang -S -emit-llvm matmul.c -Xclang -disable-O0-optnone -o matmul.ll
|
||||
|
||||
echo "--> 2. Prepare the LLVM-IR for Polly"
|
||||
opt -S -polly-canonicalize matmul.s > matmul.preopt.ll
|
||||
opt -S -polly-canonicalize matmul.ll -o matmul.preopt.ll
|
||||
|
||||
echo "--> 3. Show the SCoPs detected by Polly"
|
||||
opt -basicaa -polly-ast -analyze -q matmul.preopt.ll \
|
||||
-polly-process-unprofitable
|
||||
opt -basicaa -polly-ast -analyze matmul.preopt.ll \
|
||||
-polly-process-unprofitable -polly-use-llvm-names
|
||||
|
||||
echo "--> 4.1 Highlight the detected SCoPs in the CFGs of the program"
|
||||
# We only create .dot files, as directly -view-scops directly calls graphviz
|
||||
# which would require user interaction to continue the script.
|
||||
# opt -basicaa -view-scops -disable-output matmul.preopt.ll
|
||||
opt -basicaa -dot-scops -disable-output matmul.preopt.ll
|
||||
opt -basicaa -dot-scops -disable-output matmul.preopt.ll -polly-use-llvm-names
|
||||
|
||||
echo "--> 4.2 Highlight the detected SCoPs in the CFGs of the program (print \
|
||||
no instructions)"
|
||||
# We only create .dot files, as directly -view-scops-only directly calls
|
||||
# graphviz which would require user interaction to continue the script.
|
||||
# opt -basicaa -view-scops-only -disable-output matmul.preopt.ll
|
||||
opt -basicaa -dot-scops-only -disable-output matmul.preopt.ll
|
||||
opt -basicaa -dot-scops-only -disable-output matmul.preopt.ll -polly-use-llvm-names
|
||||
|
||||
echo "--> 4.3 Create .png files from the .dot files"
|
||||
for i in `ls *.dot`; do dot -Tpng $i > $i.png; done
|
||||
|
||||
echo "--> 5. View the polyhedral representation of the SCoPs"
|
||||
opt -basicaa -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
|
||||
opt -basicaa -polly-scops -analyze matmul.preopt.ll \
|
||||
-polly-process-unprofitable -polly-use-llvm-names
|
||||
|
||||
echo "--> 6. Show the dependences for the SCoPs"
|
||||
opt -basicaa -polly-dependences -analyze matmul.preopt.ll \
|
||||
-polly-process-unprofitable
|
||||
-polly-process-unprofitable -polly-use-llvm-names
|
||||
|
||||
echo "--> 7. Export jscop files"
|
||||
opt -basicaa -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
|
||||
opt -basicaa -polly-export-jscop matmul.preopt.ll \
|
||||
-polly-process-unprofitable -disable-output -polly-use-llvm-names
|
||||
|
||||
echo "--> 8. Import the updated jscop files and print the new SCoPs. (optional)"
|
||||
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
|
||||
-polly-process-unprofitable
|
||||
-polly-process-unprofitable -polly-use-llvm-names
|
||||
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
|
||||
-polly-import-jscop-postfix=interchanged -polly-process-unprofitable
|
||||
-polly-import-jscop-postfix=interchanged -polly-process-unprofitable -polly-use-llvm-names
|
||||
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
|
||||
-polly-import-jscop-postfix=interchanged+tiled -polly-process-unprofitable
|
||||
-polly-import-jscop-postfix=interchanged+tiled -polly-process-unprofitable -polly-use-llvm-names
|
||||
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
|
||||
-polly-import-jscop-postfix=interchanged+tiled+vector \
|
||||
-polly-process-unprofitable
|
||||
-polly-process-unprofitable -polly-use-llvm-names
|
||||
|
||||
echo "--> 9. Codegenerate the SCoPs"
|
||||
opt -basicaa -polly-import-jscop -polly-import-jscop-postfix=interchanged \
|
||||
-polly-codegen -polly-process-unprofitable\
|
||||
matmul.preopt.ll | opt -O3 > matmul.polly.interchanged.ll
|
||||
opt -basicaa -polly-import-jscop \
|
||||
opt -S -basicaa -polly-import-jscop -polly-import-jscop-postfix=interchanged \
|
||||
-polly-codegen -polly-process-unprofitable -polly-use-llvm-names \
|
||||
matmul.preopt.ll | opt -O3 -S -o matmul.polly.interchanged.ll
|
||||
opt -S -basicaa -polly-import-jscop \
|
||||
-polly-import-jscop-postfix=interchanged+tiled -polly-codegen \
|
||||
matmul.preopt.ll -polly-process-unprofitable \
|
||||
| opt -O3 > matmul.polly.interchanged+tiled.ll
|
||||
opt -basicaa -polly-import-jscop -polly-process-unprofitable\
|
||||
matmul.preopt.ll -polly-process-unprofitable -polly-use-llvm-names \
|
||||
| opt -O3 -S -o matmul.polly.interchanged+tiled.ll
|
||||
opt -S -basicaa -polly-import-jscop -polly-process-unprofitable\
|
||||
-polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \
|
||||
matmul.preopt.ll -polly-vectorizer=polly\
|
||||
| opt -O3 > matmul.polly.interchanged+tiled+vector.ll
|
||||
opt -basicaa -polly-import-jscop -polly-process-unprofitable\
|
||||
matmul.preopt.ll -polly-vectorizer=polly -polly-use-llvm-names \
|
||||
| opt -O3 -S -o matmul.polly.interchanged+tiled+vector.ll
|
||||
opt -S -basicaa -polly-import-jscop -polly-process-unprofitable\
|
||||
-polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \
|
||||
matmul.preopt.ll -polly-vectorizer=polly -polly-parallel\
|
||||
| opt -O3 > matmul.polly.interchanged+tiled+vector+openmp.ll
|
||||
opt matmul.preopt.ll | opt -O3 > matmul.normalopt.ll
|
||||
matmul.preopt.ll -polly-vectorizer=polly -polly-parallel -polly-use-llvm-names \
|
||||
| opt -O3 -S -o matmul.polly.interchanged+tiled+vector+openmp.ll
|
||||
opt -S matmul.preopt.ll | opt -O3 -S -o matmul.normalopt.ll
|
||||
|
||||
echo "--> 10. Create the executables"
|
||||
llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s && gcc matmul.polly.interchanged.s \
|
||||
-o matmul.polly.interchanged.exe
|
||||
llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s && gcc matmul.polly.interchanged+tiled.s \
|
||||
-o matmul.polly.interchanged+tiled.exe
|
||||
llc matmul.polly.interchanged+tiled+vector.ll \
|
||||
-o matmul.polly.interchanged+tiled+vector.s \
|
||||
&& gcc matmul.polly.interchanged+tiled+vector.s \
|
||||
-o matmul.polly.interchanged+tiled+vector.exe
|
||||
llc matmul.polly.interchanged+tiled+vector+openmp.ll \
|
||||
-o matmul.polly.interchanged+tiled+vector+openmp.s \
|
||||
&& gcc -lgomp matmul.polly.interchanged+tiled+vector+openmp.s \
|
||||
-o matmul.polly.interchanged+tiled+vector+openmp.exe
|
||||
llc matmul.normalopt.ll -o matmul.normalopt.s && gcc matmul.normalopt.s \
|
||||
-o matmul.normalopt.exe
|
||||
llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s -relocation-model=pic
|
||||
gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
|
||||
llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s -relocation-model=pic
|
||||
gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
|
||||
llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s -relocation-model=pic
|
||||
gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
|
||||
llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s -relocation-model=pic
|
||||
gcc matmul.polly.interchanged+tiled+vector+openmp.s -lgomp -o matmul.polly.interchanged+tiled+vector+openmp.exe
|
||||
llc matmul.normalopt.ll -o matmul.normalopt.s -relocation-model=pic
|
||||
gcc matmul.normalopt.s -lgomp -o matmul.normalopt.exe
|
||||
|
||||
echo "--> 11. Compare the runtime of the executables"
|
||||
|
||||
|
@ -1,39 +1,39 @@
|
||||
digraph "Scop Graph for 'init_array' function" {
|
||||
label="Scop Graph for 'init_array' function";
|
||||
|
||||
Node0x5b5b5a0 [shape=record,label="{entry:\l br label %entry.split\l}"];
|
||||
Node0x5b5b5a0 -> Node0x5b5de30;
|
||||
Node0x5b5de30 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
|
||||
Node0x5b5de30 -> Node0x5b5de50;
|
||||
Node0x5b5de50 [shape=record,label="{for.cond1.preheader: \l %indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]\l br label %for.body3\l}"];
|
||||
Node0x5b5de50 -> Node0x5b5b570;
|
||||
Node0x5b5b570 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.body3 ]\l %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %1 = trunc i64 %0 to i32\l %rem = srem i32 %1, 1024\l %add = add nsw i32 %rem, 1\l %conv = sitofp i32 %add to double\l %div = fmul double %conv, 5.000000e-01\l %conv4 = fptrunc double %div to float\l %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv4, float* %arrayidx6, align 4\l %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %3 = trunc i64 %2 to i32\l %rem8 = srem i32 %3, 1024\l %add9 = add nsw i32 %rem8, 1\l %conv10 = sitofp i32 %add9 to double\l %div11 = fmul double %conv10, 5.000000e-01\l %conv12 = fptrunc double %div11 to float\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv12, float* %arrayidx16, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.inc17\l}"];
|
||||
Node0x5b5b570 -> Node0x5b5b570[constraint=false];
|
||||
Node0x5b5b570 -> Node0x5b5df30;
|
||||
Node0x5b5df30 [shape=record,label="{for.inc17: \l %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1\l %exitcond7 = icmp ne i64 %indvars.iv.next6, 1536\l br i1 %exitcond7, label %for.cond1.preheader, label %for.end19\l}"];
|
||||
Node0x5b5df30 -> Node0x5b5de50[constraint=false];
|
||||
Node0x5b5df30 -> Node0x5b5df90;
|
||||
Node0x5b5df90 [shape=record,label="{for.end19: \l ret void\l}"];
|
||||
Node0x7fffc6c46ea0 [shape=record,label="{entry:\l br label %entry.split\l}"];
|
||||
Node0x7fffc6c46ea0 -> Node0x7fffc6c46f20;
|
||||
Node0x7fffc6c46f20 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
|
||||
Node0x7fffc6c46f20 -> Node0x7fffc6c47000;
|
||||
Node0x7fffc6c47000 [shape=record,label="{for.cond1.preheader: \l %indvars.iv4 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next5, %for.inc17 ]\l br label %for.body3\l}"];
|
||||
Node0x7fffc6c47000 -> Node0x7fffc6c47290;
|
||||
Node0x7fffc6c47290 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.body3 ]\l %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv4\l %1 = trunc i64 %0 to i32\l %rem = and i32 %1, 1023\l %add = add nuw nsw i32 %rem, 1\l %conv = sitofp i32 %add to double\l %div = fmul double %conv, 5.000000e-01\l %conv4 = fptrunc double %div to float\l %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @A, i64 0, i64 %indvars.iv4, i64 %indvars.iv\l store float %conv4, float* %arrayidx6, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv\l store float %conv4, float* %arrayidx16, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.inc17\l}"];
|
||||
Node0x7fffc6c47290 -> Node0x7fffc6c47290[constraint=false];
|
||||
Node0x7fffc6c47290 -> Node0x7fffc6c47b10;
|
||||
Node0x7fffc6c47b10 [shape=record,label="{for.inc17: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.cond1.preheader, label %for.end19\l}"];
|
||||
Node0x7fffc6c47b10 -> Node0x7fffc6c47000[constraint=false];
|
||||
Node0x7fffc6c47b10 -> Node0x7fffc6c48b10;
|
||||
Node0x7fffc6c48b10 [shape=record,label="{for.end19: \l ret void\l}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x5b4bdd0 {
|
||||
subgraph cluster_0x7fffc6c32540 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x5b4bf50 {
|
||||
subgraph cluster_0x7fffc6c32f30 {
|
||||
label = "Region can not profitably be optimized!";
|
||||
style = solid;
|
||||
color = 6
|
||||
subgraph cluster_0x5b4c0d0 {
|
||||
subgraph cluster_0x7fffc6c32690 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 5
|
||||
Node0x5b5b570;
|
||||
Node0x7fffc6c47290;
|
||||
}
|
||||
Node0x5b5de50;
|
||||
Node0x5b5df30;
|
||||
Node0x7fffc6c47000;
|
||||
Node0x7fffc6c47b10;
|
||||
}
|
||||
Node0x5b5b5a0;
|
||||
Node0x5b5de30;
|
||||
Node0x5b5df90;
|
||||
Node0x7fffc6c46ea0;
|
||||
Node0x7fffc6c46f20;
|
||||
Node0x7fffc6c48b10;
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 151 KiB After Width: | Height: | Size: 123 KiB |
@ -1,50 +1,50 @@
|
||||
digraph "Scop Graph for 'main' function" {
|
||||
label="Scop Graph for 'main' function";
|
||||
|
||||
Node0x5b5c850 [shape=record,label="{entry:\l br label %entry.split\l}"];
|
||||
Node0x5b5c850 -> Node0x5b5a440;
|
||||
Node0x5b5a440 [shape=record,label="{entry.split: \l tail call void @init_array()\l br label %for.cond1.preheader\l}"];
|
||||
Node0x5b5a440 -> Node0x5b38cd0;
|
||||
Node0x5b38cd0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]\l br label %for.body3\l}"];
|
||||
Node0x5b38cd0 -> Node0x5b4bd30;
|
||||
Node0x5b4bd30 [shape=record,label="{for.body3: \l %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5,\l... %for.inc25 ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.body8\l}"];
|
||||
Node0x5b4bd30 -> Node0x5b38c50;
|
||||
Node0x5b38c50 [shape=record,label="{for.body8: \l %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]\l %arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l %0 = load float, float* %arrayidx12, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv\l %1 = load float, float* %arrayidx16, align 4\l %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4\l %2 = load float, float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l %arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float %add, float* %arrayidx24, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body8, label %for.inc25\l}"];
|
||||
Node0x5b38c50 -> Node0x5b38c50[constraint=false];
|
||||
Node0x5b38c50 -> Node0x5b5a290;
|
||||
Node0x5b5a290 [shape=record,label="{for.inc25: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.body3, label %for.inc28\l}"];
|
||||
Node0x5b5a290 -> Node0x5b4bd30[constraint=false];
|
||||
Node0x5b5a290 -> Node0x5b5a340;
|
||||
Node0x5b5a340 [shape=record,label="{for.inc28: \l %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1\l %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536\l br i1 %exitcond9, label %for.cond1.preheader, label %for.end30\l}"];
|
||||
Node0x5b5a340 -> Node0x5b38cd0[constraint=false];
|
||||
Node0x5b5a340 -> Node0x5b5a3a0;
|
||||
Node0x5b5a3a0 [shape=record,label="{for.end30: \l ret i32 0\l}"];
|
||||
Node0x7fffc6c4cb90 [shape=record,label="{entry:\l br label %entry.split\l}"];
|
||||
Node0x7fffc6c4cb90 -> Node0x7fffc6c47b10;
|
||||
Node0x7fffc6c47b10 [shape=record,label="{entry.split: \l tail call void @init_array()\l br label %for.cond1.preheader\l}"];
|
||||
Node0x7fffc6c47b10 -> Node0x7fffc6c456e0;
|
||||
Node0x7fffc6c456e0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]\l br label %for.body3\l}"];
|
||||
Node0x7fffc6c456e0 -> Node0x7fffc6c3f080;
|
||||
Node0x7fffc6c3f080 [shape=record,label="{for.body3: \l %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5,\l... %for.inc25 ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.body8\l}"];
|
||||
Node0x7fffc6c3f080 -> Node0x7fffc6c3f220;
|
||||
Node0x7fffc6c3f220 [shape=record,label="{for.body8: \l %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]\l %0 = load float, float* %arrayidx5, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv\l %1 = load float, float* %arrayidx16, align 4\l %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4\l %2 = load float, float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l store float %add, float* %arrayidx5, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body8, label %for.inc25\l}"];
|
||||
Node0x7fffc6c3f220 -> Node0x7fffc6c3f220[constraint=false];
|
||||
Node0x7fffc6c3f220 -> Node0x7fffc6c40480;
|
||||
Node0x7fffc6c40480 [shape=record,label="{for.inc25: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.body3, label %for.inc28\l}"];
|
||||
Node0x7fffc6c40480 -> Node0x7fffc6c3f080[constraint=false];
|
||||
Node0x7fffc6c40480 -> Node0x7fffc6c404e0;
|
||||
Node0x7fffc6c404e0 [shape=record,label="{for.inc28: \l %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1\l %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536\l br i1 %exitcond9, label %for.cond1.preheader, label %for.end30\l}"];
|
||||
Node0x7fffc6c404e0 -> Node0x7fffc6c456e0[constraint=false];
|
||||
Node0x7fffc6c404e0 -> Node0x7fffc6c40540;
|
||||
Node0x7fffc6c40540 [shape=record,label="{for.end30: \l ret i32 0\l}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x5b5c970 {
|
||||
subgraph cluster_0x7fffc6c32540 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x5b5c5a0 {
|
||||
subgraph cluster_0x7fffc6c32f30 {
|
||||
label = "";
|
||||
style = filled;
|
||||
color = 3 subgraph cluster_0x5b5c9f0 {
|
||||
color = 3 subgraph cluster_0x7fffc6c32690 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 5
|
||||
subgraph cluster_0x5b5c110 {
|
||||
subgraph cluster_0x7fffc6c32dc0 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 7
|
||||
Node0x5b38c50;
|
||||
Node0x7fffc6c3f220;
|
||||
}
|
||||
Node0x5b4bd30;
|
||||
Node0x5b5a290;
|
||||
Node0x7fffc6c3f080;
|
||||
Node0x7fffc6c40480;
|
||||
}
|
||||
Node0x5b38cd0;
|
||||
Node0x5b5a340;
|
||||
Node0x7fffc6c456e0;
|
||||
Node0x7fffc6c404e0;
|
||||
}
|
||||
Node0x5b5c850;
|
||||
Node0x5b5a440;
|
||||
Node0x5b5a3a0;
|
||||
Node0x7fffc6c4cb90;
|
||||
Node0x7fffc6c47b10;
|
||||
Node0x7fffc6c40540;
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 186 KiB After Width: | Height: | Size: 175 KiB |
@ -1,51 +1,51 @@
|
||||
digraph "Scop Graph for 'print_array' function" {
|
||||
label="Scop Graph for 'print_array' function";
|
||||
|
||||
Node0x5b5ee00 [shape=record,label="{entry:\l br label %entry.split\l}"];
|
||||
Node0x5b5ee00 -> Node0x5b5ee50;
|
||||
Node0x5b5ee50 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
|
||||
Node0x5b5ee50 -> Node0x5b5ee70;
|
||||
Node0x5b5ee70 [shape=record,label="{for.cond1.preheader: \l %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]\l %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l br label %for.body3\l}"];
|
||||
Node0x5b5ee70 -> Node0x5b5ee20;
|
||||
Node0x5b5ee20 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.inc ]\l %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv\l %2 = load float, float* %arrayidx5, align 4\l %conv = fpext float %2 to double\l %call = tail call i32 (%struct._IO_FILE*, i8*, ...)\l... @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x\l... i8]* @.str, i64 0, i64 0), double %conv) #2\l %3 = trunc i64 %indvars.iv to i32\l %rem = srem i32 %3, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %for.inc\l}"];
|
||||
Node0x5b5ee20 -> Node0x5b60d10;
|
||||
Node0x5b5ee20 -> Node0x5b60d70;
|
||||
Node0x5b60d10 [shape=record,label="{if.then: \l %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)\l br label %for.inc\l}"];
|
||||
Node0x5b60d10 -> Node0x5b60d70;
|
||||
Node0x5b60d70 [shape=record,label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
|
||||
Node0x5b60d70 -> Node0x5b5ee20[constraint=false];
|
||||
Node0x5b60d70 -> Node0x5b60e10;
|
||||
Node0x5b60e10 [shape=record,label="{for.end: \l %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]\l %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)\l %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1\l %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536\l br i1 %exitcond8, label %for.cond1.preheader, label %for.end12\l}"];
|
||||
Node0x5b60e10 -> Node0x5b5ee70[constraint=false];
|
||||
Node0x5b60e10 -> Node0x5b60e70;
|
||||
Node0x5b60e70 [shape=record,label="{for.end12: \l ret void\l}"];
|
||||
Node0x7fffc6c42bf0 [shape=record,label="{entry:\l br label %entry.split\l}"];
|
||||
Node0x7fffc6c42bf0 -> Node0x7fffc6c42f10;
|
||||
Node0x7fffc6c42f10 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
|
||||
Node0x7fffc6c42f10 -> Node0x7fffc6c4abb0;
|
||||
Node0x7fffc6c4abb0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]\l %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l br label %for.body3\l}"];
|
||||
Node0x7fffc6c4abb0 -> Node0x7fffc6c4ac10;
|
||||
Node0x7fffc6c4ac10 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.inc ]\l %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv\l %2 = load float, float* %arrayidx5, align 4\l %conv = fpext float %2 to double\l %call = tail call i32 (%struct._IO_FILE*, i8*, ...)\l... @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x\l... i8]* @.str, i64 0, i64 0), double %conv) #2\l %3 = trunc i64 %indvars.iv to i32\l %rem = urem i32 %3, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %for.inc\l}"];
|
||||
Node0x7fffc6c4ac10 -> Node0x7fffc6c4af80;
|
||||
Node0x7fffc6c4ac10 -> Node0x7fffc6c4afe0;
|
||||
Node0x7fffc6c4af80 [shape=record,label="{if.then: \l %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)\l br label %for.inc\l}"];
|
||||
Node0x7fffc6c4af80 -> Node0x7fffc6c4afe0;
|
||||
Node0x7fffc6c4afe0 [shape=record,label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
|
||||
Node0x7fffc6c4afe0 -> Node0x7fffc6c4ac10[constraint=false];
|
||||
Node0x7fffc6c4afe0 -> Node0x7fffc6c4b3b0;
|
||||
Node0x7fffc6c4b3b0 [shape=record,label="{for.end: \l %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]\l %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)\l %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1\l %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536\l br i1 %exitcond8, label %for.cond1.preheader, label %for.end12\l}"];
|
||||
Node0x7fffc6c4b3b0 -> Node0x7fffc6c4abb0[constraint=false];
|
||||
Node0x7fffc6c4b3b0 -> Node0x7fffc6c4b580;
|
||||
Node0x7fffc6c4b580 [shape=record,label="{for.end12: \l ret void\l}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x5b349a0 {
|
||||
subgraph cluster_0x7fffc6c32540 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x5b5c2c0 {
|
||||
subgraph cluster_0x7fffc6c32dc0 {
|
||||
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
|
||||
style = solid;
|
||||
color = 6
|
||||
subgraph cluster_0x5b5c240 {
|
||||
subgraph cluster_0x7fffc6c32690 {
|
||||
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
|
||||
style = solid;
|
||||
color = 5
|
||||
subgraph cluster_0x5b34a20 {
|
||||
subgraph cluster_0x7fffc6c32f30 {
|
||||
label = "Region can not profitably be optimized!";
|
||||
style = solid;
|
||||
color = 7
|
||||
Node0x5b5ee20;
|
||||
Node0x5b60d10;
|
||||
Node0x7fffc6c4ac10;
|
||||
Node0x7fffc6c4af80;
|
||||
}
|
||||
Node0x5b60d70;
|
||||
Node0x7fffc6c4afe0;
|
||||
}
|
||||
Node0x5b5ee70;
|
||||
Node0x5b60e10;
|
||||
Node0x7fffc6c4abb0;
|
||||
Node0x7fffc6c4b3b0;
|
||||
}
|
||||
Node0x5b5ee00;
|
||||
Node0x5b5ee50;
|
||||
Node0x5b60e70;
|
||||
Node0x7fffc6c42bf0;
|
||||
Node0x7fffc6c42f10;
|
||||
Node0x7fffc6c4b580;
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 196 KiB After Width: | Height: | Size: 205 KiB |
@ -1,39 +1,39 @@
|
||||
digraph "Scop Graph for 'init_array' function" {
|
||||
label="Scop Graph for 'init_array' function";
|
||||
|
||||
Node0x5ae2570 [shape=record,label="{entry}"];
|
||||
Node0x5ae2570 -> Node0x5ae4e90;
|
||||
Node0x5ae4e90 [shape=record,label="{entry.split}"];
|
||||
Node0x5ae4e90 -> Node0x5ae4f50;
|
||||
Node0x5ae4f50 [shape=record,label="{for.cond1.preheader}"];
|
||||
Node0x5ae4f50 -> Node0x5ae50e0;
|
||||
Node0x5ae50e0 [shape=record,label="{for.body3}"];
|
||||
Node0x5ae50e0 -> Node0x5ae50e0[constraint=false];
|
||||
Node0x5ae50e0 -> Node0x5ae5100;
|
||||
Node0x5ae5100 [shape=record,label="{for.inc17}"];
|
||||
Node0x5ae5100 -> Node0x5ae4f50[constraint=false];
|
||||
Node0x5ae5100 -> Node0x5ae4ff0;
|
||||
Node0x5ae4ff0 [shape=record,label="{for.end19}"];
|
||||
Node0x7fffdb5cceb0 [shape=record,label="{entry}"];
|
||||
Node0x7fffdb5cceb0 -> Node0x7fffdb5ccf00;
|
||||
Node0x7fffdb5ccf00 [shape=record,label="{entry.split}"];
|
||||
Node0x7fffdb5ccf00 -> Node0x7fffdb5ccf80;
|
||||
Node0x7fffdb5ccf80 [shape=record,label="{for.cond1.preheader}"];
|
||||
Node0x7fffdb5ccf80 -> Node0x7fffdb5cd090;
|
||||
Node0x7fffdb5cd090 [shape=record,label="{for.body3}"];
|
||||
Node0x7fffdb5cd090 -> Node0x7fffdb5cd090[constraint=false];
|
||||
Node0x7fffdb5cd090 -> Node0x7fffdb5cd0b0;
|
||||
Node0x7fffdb5cd0b0 [shape=record,label="{for.inc17}"];
|
||||
Node0x7fffdb5cd0b0 -> Node0x7fffdb5ccf80[constraint=false];
|
||||
Node0x7fffdb5cd0b0 -> Node0x7fffdb5cd2a0;
|
||||
Node0x7fffdb5cd2a0 [shape=record,label="{for.end19}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x5ad2dd0 {
|
||||
subgraph cluster_0x7fffdb5b8530 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x5ad2f50 {
|
||||
subgraph cluster_0x7fffdb5b8f40 {
|
||||
label = "Region can not profitably be optimized!";
|
||||
style = solid;
|
||||
color = 6
|
||||
subgraph cluster_0x5ad30d0 {
|
||||
subgraph cluster_0x7fffdb5b86a0 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 5
|
||||
Node0x5ae50e0;
|
||||
Node0x7fffdb5cd090;
|
||||
}
|
||||
Node0x5ae4f50;
|
||||
Node0x5ae5100;
|
||||
Node0x7fffdb5ccf80;
|
||||
Node0x7fffdb5cd0b0;
|
||||
}
|
||||
Node0x5ae2570;
|
||||
Node0x5ae4e90;
|
||||
Node0x5ae4ff0;
|
||||
Node0x7fffdb5cceb0;
|
||||
Node0x7fffdb5ccf00;
|
||||
Node0x7fffdb5cd2a0;
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 26 KiB |
@ -1,50 +1,50 @@
|
||||
digraph "Scop Graph for 'main' function" {
|
||||
label="Scop Graph for 'main' function";
|
||||
|
||||
Node0x5abfcf0 [shape=record,label="{entry}"];
|
||||
Node0x5abfcf0 -> Node0x5ade060;
|
||||
Node0x5ade060 [shape=record,label="{entry.split}"];
|
||||
Node0x5ade060 -> Node0x5ade0e0;
|
||||
Node0x5ade0e0 [shape=record,label="{for.cond1.preheader}"];
|
||||
Node0x5ade0e0 -> Node0x5ade100;
|
||||
Node0x5ade100 [shape=record,label="{for.body3}"];
|
||||
Node0x5ade100 -> Node0x5ae0020;
|
||||
Node0x5ae0020 [shape=record,label="{for.body8}"];
|
||||
Node0x5ae0020 -> Node0x5ae0020[constraint=false];
|
||||
Node0x5ae0020 -> Node0x5ae0080;
|
||||
Node0x5ae0080 [shape=record,label="{for.inc25}"];
|
||||
Node0x5ae0080 -> Node0x5ade100[constraint=false];
|
||||
Node0x5ae0080 -> Node0x5adfef0;
|
||||
Node0x5adfef0 [shape=record,label="{for.inc28}"];
|
||||
Node0x5adfef0 -> Node0x5ade0e0[constraint=false];
|
||||
Node0x5adfef0 -> Node0x5adff50;
|
||||
Node0x5adff50 [shape=record,label="{for.end30}"];
|
||||
Node0x7fffdb5cbd10 [shape=record,label="{entry}"];
|
||||
Node0x7fffdb5cbd10 -> Node0x7fffdb5c7140;
|
||||
Node0x7fffdb5c7140 [shape=record,label="{entry.split}"];
|
||||
Node0x7fffdb5c7140 -> Node0x7fffdb5c7200;
|
||||
Node0x7fffdb5c7200 [shape=record,label="{for.cond1.preheader}"];
|
||||
Node0x7fffdb5c7200 -> Node0x7fffdb5ccd60;
|
||||
Node0x7fffdb5ccd60 [shape=record,label="{for.body3}"];
|
||||
Node0x7fffdb5ccd60 -> Node0x7fffdb5ccd80;
|
||||
Node0x7fffdb5ccd80 [shape=record,label="{for.body8}"];
|
||||
Node0x7fffdb5ccd80 -> Node0x7fffdb5ccd80[constraint=false];
|
||||
Node0x7fffdb5ccd80 -> Node0x7fffdb5cce20;
|
||||
Node0x7fffdb5cce20 [shape=record,label="{for.inc25}"];
|
||||
Node0x7fffdb5cce20 -> Node0x7fffdb5ccd60[constraint=false];
|
||||
Node0x7fffdb5cce20 -> Node0x7fffdb5cce80;
|
||||
Node0x7fffdb5cce80 [shape=record,label="{for.inc28}"];
|
||||
Node0x7fffdb5cce80 -> Node0x7fffdb5c7200[constraint=false];
|
||||
Node0x7fffdb5cce80 -> Node0x7fffdb5ccee0;
|
||||
Node0x7fffdb5ccee0 [shape=record,label="{for.end30}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x5ad2c80 {
|
||||
subgraph cluster_0x7fffdb5b8530 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x5ad2e50 {
|
||||
subgraph cluster_0x7fffdb5b8f40 {
|
||||
label = "";
|
||||
style = filled;
|
||||
color = 3 subgraph cluster_0x5ad2d00 {
|
||||
color = 3 subgraph cluster_0x7fffdb5b86a0 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 5
|
||||
subgraph cluster_0x5ad2dd0 {
|
||||
subgraph cluster_0x7fffdb5cc3c0 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 7
|
||||
Node0x5ae0020;
|
||||
Node0x7fffdb5ccd80;
|
||||
}
|
||||
Node0x5ade100;
|
||||
Node0x5ae0080;
|
||||
Node0x7fffdb5ccd60;
|
||||
Node0x7fffdb5cce20;
|
||||
}
|
||||
Node0x5ade0e0;
|
||||
Node0x5adfef0;
|
||||
Node0x7fffdb5c7200;
|
||||
Node0x7fffdb5cce80;
|
||||
}
|
||||
Node0x5abfcf0;
|
||||
Node0x5ade060;
|
||||
Node0x5adff50;
|
||||
Node0x7fffdb5cbd10;
|
||||
Node0x7fffdb5c7140;
|
||||
Node0x7fffdb5ccee0;
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 34 KiB |
@ -1,51 +1,51 @@
|
||||
digraph "Scop Graph for 'print_array' function" {
|
||||
label="Scop Graph for 'print_array' function";
|
||||
|
||||
Node0x5ae5e30 [shape=record,label="{entry}"];
|
||||
Node0x5ae5e30 -> Node0x5ae5f50;
|
||||
Node0x5ae5f50 [shape=record,label="{entry.split}"];
|
||||
Node0x5ae5f50 -> Node0x5ae7d90;
|
||||
Node0x5ae7d90 [shape=record,label="{for.cond1.preheader}"];
|
||||
Node0x5ae7d90 -> Node0x5ae7f20;
|
||||
Node0x5ae7f20 [shape=record,label="{for.body3}"];
|
||||
Node0x5ae7f20 -> Node0x5ae7f40;
|
||||
Node0x5ae7f20 -> Node0x5ae7f60;
|
||||
Node0x5ae7f40 [shape=record,label="{if.then}"];
|
||||
Node0x5ae7f40 -> Node0x5ae7f60;
|
||||
Node0x5ae7f60 [shape=record,label="{for.inc}"];
|
||||
Node0x5ae7f60 -> Node0x5ae7f20[constraint=false];
|
||||
Node0x5ae7f60 -> Node0x5ae7e30;
|
||||
Node0x5ae7e30 [shape=record,label="{for.end}"];
|
||||
Node0x5ae7e30 -> Node0x5ae7d90[constraint=false];
|
||||
Node0x5ae7e30 -> Node0x5ae8110;
|
||||
Node0x5ae8110 [shape=record,label="{for.end12}"];
|
||||
Node0x7fffdb5c9180 [shape=record,label="{entry}"];
|
||||
Node0x7fffdb5c9180 -> Node0x7fffdb5b7940;
|
||||
Node0x7fffdb5b7940 [shape=record,label="{entry.split}"];
|
||||
Node0x7fffdb5b7940 -> Node0x7fffdb5b7960;
|
||||
Node0x7fffdb5b7960 [shape=record,label="{for.cond1.preheader}"];
|
||||
Node0x7fffdb5b7960 -> Node0x7fffdb5b79c0;
|
||||
Node0x7fffdb5b79c0 [shape=record,label="{for.body3}"];
|
||||
Node0x7fffdb5b79c0 -> Node0x7fffdb5b79e0;
|
||||
Node0x7fffdb5b79c0 -> Node0x7fffdb5b7a80;
|
||||
Node0x7fffdb5b79e0 [shape=record,label="{if.then}"];
|
||||
Node0x7fffdb5b79e0 -> Node0x7fffdb5b7a80;
|
||||
Node0x7fffdb5b7a80 [shape=record,label="{for.inc}"];
|
||||
Node0x7fffdb5b7a80 -> Node0x7fffdb5b79c0[constraint=false];
|
||||
Node0x7fffdb5b7a80 -> Node0x7fffdb5b7ae0;
|
||||
Node0x7fffdb5b7ae0 [shape=record,label="{for.end}"];
|
||||
Node0x7fffdb5b7ae0 -> Node0x7fffdb5b7960[constraint=false];
|
||||
Node0x7fffdb5b7ae0 -> Node0x7fffdb5b7b40;
|
||||
Node0x7fffdb5b7b40 [shape=record,label="{for.end12}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x5abb9a0 {
|
||||
subgraph cluster_0x7fffdb5b8530 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x5ae32c0 {
|
||||
subgraph cluster_0x7fffdb5cc3c0 {
|
||||
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
|
||||
style = solid;
|
||||
color = 6
|
||||
subgraph cluster_0x5ae3240 {
|
||||
subgraph cluster_0x7fffdb5b86a0 {
|
||||
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
|
||||
style = solid;
|
||||
color = 5
|
||||
subgraph cluster_0x5abba20 {
|
||||
subgraph cluster_0x7fffdb5b8f40 {
|
||||
label = "Region can not profitably be optimized!";
|
||||
style = solid;
|
||||
color = 7
|
||||
Node0x5ae7f20;
|
||||
Node0x5ae7f40;
|
||||
Node0x7fffdb5b79c0;
|
||||
Node0x7fffdb5b79e0;
|
||||
}
|
||||
Node0x5ae7f60;
|
||||
Node0x7fffdb5b7a80;
|
||||
}
|
||||
Node0x5ae7d90;
|
||||
Node0x5ae7e30;
|
||||
Node0x7fffdb5b7960;
|
||||
Node0x7fffdb5b7ae0;
|
||||
}
|
||||
Node0x5ae5e30;
|
||||
Node0x5ae5f50;
|
||||
Node0x5ae8110;
|
||||
Node0x7fffdb5c9180;
|
||||
Node0x7fffdb5b7940;
|
||||
Node0x7fffdb5b7b40;
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 51 KiB After Width: | Height: | Size: 76 KiB |