Fix race condition in CrashGroup::find_or_create by using constraint and upsert (for pg > 9.5) or table lock
authorVincent Tondellier <tonton+hg@team1664.org>
Sun, 14 Feb 2016 20:24:11 +0100
changeset 97 f68abe1d7358
parent 96 716c0292967c
child 98 aa2437932c03
Fix race condition in CrashGroup::find_or_create by using constraint and upsert (for pg > 9.5) or table lock
lib/CrashTest/Plugin/Storage/Sql/Model/CrashGroup.pm
lib/CrashTest/Plugin/Storage/Sql/migrations_pg.sql
--- a/lib/CrashTest/Plugin/Storage/Sql/Model/CrashGroup.pm	Sun Feb 14 20:18:39 2016 +0100
+++ b/lib/CrashTest/Plugin/Storage/Sql/Model/CrashGroup.pm	Sun Feb 14 20:24:11 2016 +0100
@@ -131,36 +131,101 @@
     return $results;
 }
 
+has signature_extractor => sub {
+    my $self = shift;
+    state $sig_extract = CrashTest::Plugin::CrashSignatureExtractor::C_Cpp->new(
+        app => $self->app,
+        config => $self->app->config->{Processor}->{CrashSignatureExtractor}->{C_Cpp},
+    );
+};
+
+sub _find_by_sig {
+    my ($self, $sig) = @_;
+
+    return $self->db->query(
+        "SELECT id, crash_thread_signature_bt <-> \$1 AS dist
+        FROM crash_groups
+        WHERE crash_thread_signature_bt % \$1
+        ORDER BY dist ASC
+        LIMIT 1",
+        $sig
+    )->hash;
+}
+
+sub _create_pg95 {
+    my ($self, $uuid, $sig, $title) = @_;
+
+    my $crash_group = $self->db->query(
+        "INSERT INTO crash_groups (uuid, crash_thread_signature_bt, title)
+        VALUES (?, ?, ?)
+        ON CONFLICT DO NOTHING
+        RETURNING id",
+        $uuid, $sig, $title
+    )->hash;
+
+    if(!defined($crash_group)) {
+        $crash_group = $self->_find_by_sig($sig);
+        warn "crash group is null after insert, reselected " . $crash_group->{id} . "\n";
+    }
+
+    return $crash_group;
+}
+
+sub _create_pg94 {
+    my ($self, $uuid, $sig, $title) = @_;
+
+    my $tx = $self->db->begin;
+
+    # avoid race condition, no other safe way without upsert
+    $self->db->query("LOCK TABLE crash_groups IN EXCLUSIVE MODE")->finish;
+
+    # this query is NOT concurrency safe, there is a possible write race. That's why we lock the table.
+    my $crash_group = $self->db->query(
+        "INSERT INTO crash_groups (uuid, crash_thread_signature_bt, title)
+        SELECT \$1, \$2, \$3
+        WHERE NOT EXISTS (SELECT 1 FROM crash_groups WHERE crash_thread_signature_bt % \$2)
+        RETURNING id",
+        $uuid, $sig, $title
+    )->hash;
+
+    # may happen if there is an insert between the select (in _find_by_sig) and the lock
+    if(!defined($crash_group)) {
+        $crash_group = $self->_find_by_sig($sig);
+        warn "crash group is null after insert, reselected " . $crash_group->{id} . "\n";
+    }
+
+    $tx->commit;
+
+    return $crash_group;
+}
+
+sub _set_similarity_limit {
+    my $self = shift;
+    my $max_dist = $self->app->config->{Processor}->{CrashSignatureExtractor}->{C_Cpp}->{GroupMaxDistance} || 0.1;
+    $self->db->query("SELECT set_limit(?)", 1.0 - $max_dist)->finish;
+}
 
 sub find_or_create {
     my ($self, $uuid, $pjson) = @_;
 
-    my $max_dist = $self->app->config->{Processor}->{CrashSignatureExtractor}->{C_Cpp}->{GroupMaxDistance} || 0.1;
-
-    my $sig_extract = CrashTest::Plugin::CrashSignatureExtractor::C_Cpp->new(
-        app => $self->app,
-        config => $self->app->config->{Processor}->{CrashSignatureExtractor}->{C_Cpp}
-    );
-
     my $raw_thread = dclone $pjson->{threads}->[$pjson->{crashing_thread}->{threads_index}];
     my $thread = CrashTest::Model::Thread->new($raw_thread);
 
-    my $sig = join "\n", @{$sig_extract->extract_signature($thread)};
+    my $sig = join "\n", @{$self->signature_extractor->extract_signature($thread)};
+
+    $self->_set_similarity_limit();
+    my $crash_group = $self->_find_by_sig($sig);
 
-    my $crash_group;
-    if($sig ne "") {
-        $crash_group = $self->db->query(
-            "SELECT id, crash_thread_signature_bt <-> ? AS dist FROM crash_groups ORDER BY dist LIMIT 1",
-            $sig
-        )->hash;
+    if(!$crash_group) {
+        if($sig ne "") {
+            my $title = $self->signature_extractor->extract_signature_title($thread);
 
-        # race condition here ...
-
-        if(!$crash_group || $crash_group->{dist} > $max_dist) {
-            $crash_group = $self->db->query(
-                "INSERT INTO crash_groups (uuid, crash_thread_signature_bt, title) VALUES (?, ?, ?) RETURNING id",
-                $uuid, $sig, $sig_extract->extract_signature_title($thread)
-            )->hash;
+            if($self->db->dbh->{pg_server_version} >= 90500) {
+                #warn "PostgreSQL 9.5, nice !";
+                $crash_group = $self->_create_pg95($uuid, $sig, $title);
+            } else {
+                $crash_group = $self->_create_pg94($uuid, $sig, $title);
+            }
         }
     }
     return $crash_group;
--- a/lib/CrashTest/Plugin/Storage/Sql/migrations_pg.sql	Sun Feb 14 20:18:39 2016 +0100
+++ b/lib/CrashTest/Plugin/Storage/Sql/migrations_pg.sql	Sun Feb 14 20:24:11 2016 +0100
@@ -79,20 +79,19 @@
 -- 2 up
 -- ###########################################################################
 
+DROP INDEX crash_report_datas_idx_extract_crashing_functions;
+SELECT set_limit(0.1);
 CREATE TABLE "crash_groups" (
     "id" serial NOT NULL,
     "uuid" uuid NOT NULL,
     "title" character varying NOT NULL,
     "crash_thread_signature_bt" text NOT NULL,
+
+    EXCLUDE USING gist (crash_thread_signature_bt gist_trgm_ops WITH %),
     CONSTRAINT "crash_groups_uuid_idx" UNIQUE ("uuid"),
     PRIMARY KEY ("id")
 );
 
-DROP INDEX crash_report_datas_idx_extract_crashing_functions;
-CREATE INDEX crash_groups_idx_crash_thread_signature_bt ON crash_groups USING gist (
-    crash_thread_signature_bt gist_trgm_ops
-);
-
 ALTER TABLE "crash_reports" ADD COLUMN crash_group_id integer;
 ALTER TABLE "crash_reports" ADD COLUMN crash_group_distance real;
 ALTER TABLE "crash_reports" ADD CONSTRAINT "crash_reports_fk_crash_group_id" FOREIGN KEY ("crash_group_id")