[From nobody Wed Nov 28 10:56:12 2018 Bcc: rwestrel@redhat.com Return-Path: <rwestrel@redhat.com> Received: from localhost (lfbn-1-7974-128.w90-112.abo.wanadoo.fr. [90.112.131.128]) by smtp.gmail.com with ESMTPSA id 80sm3619407wmv.6.2018.11.28.02.53.10 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Wed, 28 Nov 2018 02:53:10 -0800 (PST) From: Roland Westrelin <rwestrel@redhat.com> To: Stuart Monteith <stuart.monteith@linaro.org> Subject: Re: [aarch64-port-dev ] Aarch64 port for ZGC, so far In-Reply-To: <CAEGA6kZGVS-nPOyYrXKM6zQJyShZUGNNcDL5is3feTPfmhg-fA@mail.gmail.com> References: <CAEGA6kawjHNPSsd=5JqKW-mF23ft+1ubEVjoGekXCLYYC4ozxA@mail.gmail.com> <780e5bca-b585-fb4a-cc58-1fa2b0eeb5c2@redhat.com> <87r2f6nbom.fsf@redhat.com> <CAEGA6kZGVS-nPOyYrXKM6zQJyShZUGNNcDL5is3feTPfmhg-fA@mail.gmail.com> Date: Wed, 28 Nov 2018 11:53:09 +0100 Message-ID: <87a7ltmq22.fsf@redhat.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="====-=-=" --====-=-= Content-Type: text/plain Content-Disposition: inline Hi Stuart, Your test case runs AFAICT with the attached patch. It would be nice to extend the tests in test/hotspot/jtreg/compiler/c2/aarch64/ so they verify the code produced when ZGC is enabled. To locate the trailing membar from the second cas, the patch adds a precedence edge from the the cas to the first one which itself has an edge to the trailing membar. The crash in the matcher is caused by the following zgc code: bool ZBarrierSetC2::matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const { if (opcode == Op_CallLeaf && (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() || n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr())) { mem_op = true; mem_addr_idx = TypeFunc::Parms + 1; return true; } return false; } What this does, I think, is make sure that passing a field/array element address to the barrier runtime call doesn't prevent the address computation from being embedded in memory access instructions that use that address. That breaks on aarch64 in that case because other instructions using that address are atomic operations and they only support indirect memory addressing. The change in the patch that addresses this excludes atomic operations from address cloning. Roland. --====-=-= Content-Type: text/x-patch Content-Disposition: inline; filename=aarch64-zgc.patch changeset: 52807:e7049428aa7e tag: tip user: roland date: Tue Nov 27 17:58:08 2018 +0100 summary: zgc aarch64 fixes diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2308,7 +2308,10 @@ // Should the Matcher clone shifts on addressing modes, expecting them // to be subsumed into complex addressing expressions or compute them // into registers? -bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { +bool Matcher::clone_address_expressions(Node* n, AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + if (n->is_LoadStore()) { + return false; + } if (clone_base_plus_offset_address(m, mstack, address_visited)) { return true; } diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -1080,7 +1080,7 @@ // Should the Matcher clone shifts on addressing modes, expecting them // to be subsumed into complex addressing expressions or compute them // into registers? -bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { +bool Matcher::clone_address_expressions(Node* n, AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { return clone_base_plus_offset_address(m, mstack, address_visited); } diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -983,7 +983,7 @@ // Should the Matcher clone shifts on addressing modes, expecting them // to be subsumed into complex addressing expressions or compute them // into registers? -bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { +bool Matcher::clone_address_expressions(Node* n, AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { return clone_base_plus_offset_address(m, mstack, address_visited); } diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1760,7 +1760,7 @@ // Should the Matcher clone shifts on addressing modes, expecting them // to be subsumed into complex addressing expressions or compute them // into registers? -bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { +bool Matcher::clone_address_expressions(Node* n, AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { return clone_base_plus_offset_address(m, mstack, address_visited); } diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad --- a/src/hotspot/cpu/sparc/sparc.ad +++ b/src/hotspot/cpu/sparc/sparc.ad @@ -1944,7 +1944,7 @@ // Should the Matcher clone shifts on addressing modes, expecting them // to be subsumed into complex addressing expressions or compute them // into registers? -bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { +bool Matcher::clone_address_expressions(Node* n, AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { return clone_base_plus_offset_address(m, mstack, address_visited); } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1623,7 +1623,7 @@ // Should the Matcher clone shifts on addressing modes, expecting them // to be subsumed into complex addressing expressions or compute them // into registers? -bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { +bool Matcher::clone_address_expressions(Node* n, AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { Node *off = m->in(AddPNode::Offset); if (off->is_Con()) { address_visited.test_set(m->_idx); // Flag as address_visited diff --git a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp --- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp +++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp @@ -534,6 +534,7 @@ // redo CAS Node* cas2 = gvn.transform(new CompareAndSwapPNode(elsen2, kit->memory(alias_idx), in_adr, in_val, in_expected, cas->order())); + cas2->add_prec(cas); Node* scmemproj2 = gvn.transform(new SCMemProjNode(cas2)); kit->set_control(elsen2); kit->set_memory(scmemproj2, alias_idx); @@ -620,6 +621,7 @@ // Redo CAS Node* cmpx2 = gvn.transform(new CompareAndExchangePNode(elsen2, kit->memory(alias_idx), in_adr, in_val, in_expected, adr_type, cmpx->get_ptr_type(), cmpx->order())); + cmpx2->add_prec(cmpx); Node* scmemproj2 = gvn.transform(new SCMemProjNode(cmpx2)); kit->set_control(elsen2); kit->set_memory(scmemproj2, alias_idx); diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -2118,7 +2118,7 @@ // But they should be marked as shared if there are other uses // besides address expressions. - if (clone_address_expressions(m->as_AddP(), mstack, address_visited)) { + if (clone_address_expressions(n, m->as_AddP(), mstack, address_visited)) { continue; } } // if( mem_op && diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -453,7 +453,7 @@ // Should the Matcher clone shifts on addressing modes, expecting them to // be subsumed into complex addressing expressions or compute them into // registers? True for Intel but false for most RISCs - bool clone_address_expressions(AddPNode* m, MStack& mstack, VectorSet& address_visited); + bool clone_address_expressions(Node* n, AddPNode* m, MStack& mstack, VectorSet& address_visited); // Clone base + offset address expression bool clone_base_plus_offset_address(AddPNode* m, MStack& mstack, VectorSet& address_visited); diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2803,6 +2803,17 @@ } MemBarNode* LoadStoreNode::trailing_membar() const { + if (UseZGC) { + for (uint i = req(); i < len(); i++) { + Node* prec = in(i); + if (prec == NULL) { + break; + } + if (prec->Opcode() == Opcode()) { + return prec->as_LoadStore()->trailing_membar(); + } + } + } MemBarNode* trailing = NULL; for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { Node* u = fast_out(i); --====-=-=-- ]