Close plots when written

jdavis3141 · jdavis3141 · commit a72021a4f0c1 · 2024-04-25T13:52:16.000-04:00
Update desciptions with floor range
Resize pass rates by pool
diff --git a/app.py b/app.py
@@ -527,7 +527,7 @@ def read_json_from_s3(bucket_name, filename, prefix):
         with st.spinner('Generating report and uploading results'):
 
             # Read build and create metric dfs
-            df_build = read_build.read_build_from_s3(build)
+            df_build = read_build.read_build_from_s3(build, data_levels=['qc', 'mfi', 'lfc', 'inst', 'cell'])
             qc = df_build.qc
             mfi = df_build.mfi
             lfc = df_build.lfc
diff --git a/descriptions.py b/descriptions.py
@@ -27,11 +27,13 @@
                 'quantile of all cell line barcodes that the given median control barcode falls into. Note that this ' \
                 'is limited to vehicle wells. The ideal distribution is signified by the dashed line on the plot.'
 
-dr_and_er = 'Dynamic range and error rate are the two key QC metrics that we use when determining whether or not a ' \
+dr_and_er = 'Dynamic range, error rate and floor range are the two key QC metrics that we use when determining whether or not a ' \
             'cell line passes or fails on a given detection plate. **Dynamic range** is given by the difference ' \
             'between the logMFI of the vehicle control and the positive control. The **error rate** is a measure of the ' \
             'overlap between the positive and negative control values for each cell line and is given as **ER = (FP - FN)/n** ' \
-            'where FP is the false positive rate, FN is the false negative rate, and n is the total number of controls.'
+            'where FP is the false positive rate, FN is the false negative rate, and n is the total number of controls./n** ' \
+            '**Floor range** is defined as the difference between the signal from a cell line in vehicle and a bead that ' \
+            'has been coupled to a sequence that lacks any matching sequence.'
 
 pass_by_plate = 'Fractions of cell lines within each detection plate that pass our thresholds for both dynamic range ' \
                 'and error rate.'
diff --git a/plotting_functions.py b/plotting_functions.py
@@ -38,6 +38,9 @@ def plot_dynamic_range(df, metric, build, filename, bucket_name='cup.clue.io'):
     fig_json = g.to_json()
     s3.put_object(Bucket=bucket_name, Key=f"{build}/{filename}", Body=fig_json.encode('utf-8'))
 
+    # Close the plot
+    plt.close('all')
+
 
 def plot_dynamic_range_norm_raw(df, build, filename, bucket_name='cup.clue.io'):
     g = px.scatter(data_frame=df,
@@ -65,6 +68,9 @@ def plot_dynamic_range_norm_raw(df, build, filename, bucket_name='cup.clue.io'):
     fig_json = g.to_json()
     s3.put_object(Bucket=bucket_name, Key=f"{build}/{filename}", Body=fig_json.encode('utf-8'))
 
+    # Close the plot
+    plt.close('all')
+
 
 # PASS RATES
 def plot_pass_rates_by_plate(df, build, filename, bucket_name='cup.clue.io'):
@@ -83,6 +89,9 @@ def plot_pass_rates_by_plate(df, build, filename, bucket_name='cup.clue.io'):
     json = g.to_json()
     s3.put_object(Bucket=bucket_name, Key=f"{build}/{filename}", Body=json.encode('utf-8'))
 
+    # Close the plot
+    plt.close('all')
+
 
 def plot_pass_rates_by_pool(df, culture, build):
     df['replicate'] = df['prism_replicate'].str.split('_').str[3]
@@ -105,7 +114,7 @@ def plot_pass_rates_by_pool(df, culture, build):
             stat_count() +
             facet_grid('rep_number ~ pert_plate') +
             theme(axis_text_x=element_text(rotation=90)) +
-            theme(figure_size=(10, 6)) +
+            theme(figure_size=(10, 4)) +
             xlab('') +
             ylab('') +
             scale_fill_manual(values=colors) +
@@ -122,6 +131,9 @@ def plot_pass_rates_by_pool(df, culture, build):
     object_key = f"{build}/{culture}_pass_by_pool.png"
     s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+    # Close the plot
+    plt.close('all')
+
 
 # DISTRIBUTIONS
 
@@ -156,6 +168,9 @@ def plot_distributions_by_plate(df, build, filename, culture, pert_types=['trt_p
     full_filename = f"{culture}_{filename}"
     s3.upload_fileobj(buffer, bucket_name, f"{build}/{full_filename}")
 
+    # Close the plot
+    plt.close('all')
+
 
 # BANANA PLOTS
 
@@ -193,6 +208,9 @@ def plot_banana_plots(df, x, y, filename, build, bucket_name='cup.clue.io'):
     fig_json = g.to_json()
     s3.put_object(Bucket=bucket_name, Key=f"{build}/{filename}", Body=fig_json.encode('utf-8'))
 
+    # Close the plot
+    plt.close('all')
+
 
 # LIVER PLOTS
 
@@ -221,6 +239,9 @@ def plot_liver_plots(df, build, filename, bucket_name='cup.clue.io'):
     fig_json = g.to_json()
     s3.put_object(Bucket=bucket_name, Key=f"{build}/{filename}", Body=fig_json.encode('utf-8'))
 
+    # Close the plot
+    plt.close('all')
+
 
 # ERROR RATE V SSMD
 
@@ -248,6 +269,9 @@ def plot_dr_error_rate(df, build, filename, bucket_name='cup.clue.io'):
     fig_json = g.to_json()
     s3.put_object(Bucket=bucket_name, Key=f"{build}/{filename}", Body=fig_json.encode('utf-8'))
 
+    # Close the plot
+    plt.close('all')
+
 
 # REPLICATE CORRELATION
 
@@ -263,6 +287,9 @@ def corrdot(*args, **kwargs):
     ax.annotate(corr_text, [.5, .5, ], xycoords="axes fraction",
                 ha='center', va='center', fontsize=font_size)
 
+    # Close the plot
+    plt.close('all')
+
 
 def make_corrplots(df, pert_plate, build, culture, metric='logMFI_norm', bucket_name='cup.clue.io'):
     data = df[(df.pert_plate == pert_plate) & (df.culture == culture)]
@@ -291,6 +318,9 @@ def make_corrplots(df, pert_plate, build, culture, metric='logMFI_norm', bucket_
     filename = f"{pert_plate}:{culture}_corrplot.png"
     s3.upload_fileobj(buffer, bucket_name, f"{build}/{filename}")
 
+    # Close plot
+    plt.close('all')
+
 
 def plot_plate_heatmaps(df, metric, build, culture, vmax=4, vmin=16, by_type=True):
     metric = metric
@@ -370,6 +400,10 @@ def plot_plate_heatmaps(df, metric, build, culture, vmax=4, vmin=16, by_type=Tru
         s3 = boto3.client('s3')
         s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+        # Close plot
+        plt.close('all')
+
+
 
 def make_pert_type_heatmaps(df, build, vmax, vmin, metric='logMFI'):
     for culture in df.culture.unique():
@@ -441,6 +475,9 @@ def make_pert_type_heatmaps(df, build, vmax, vmin, metric='logMFI'):
         s3 = boto3.client('s3')
         s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+        # Close plot
+        plt.close('all')
+
 
 def make_build_count_heatmaps(df, build, metric='count'):
     for culture in df.culture.unique():
@@ -504,6 +541,9 @@ def make_build_count_heatmaps(df, build, metric='count'):
         s3 = boto3.client('s3')
         s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+        # Close plot
+        plt.close('all')
+
 
 def generate_cbc_quantile_plot(df, build, culture):
     # Filter and get unique values
@@ -565,6 +605,9 @@ def generate_cbc_quantile_plot(df, build, culture):
     s3 = boto3.client('s3')
     s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+    # Close plot
+    plt.close('all')
+
 
 def make_build_mfi_heatmaps(df, build, vmax, vmin, metric='logMFI'):
     for culture in df.culture.unique():
@@ -628,6 +671,9 @@ def make_build_mfi_heatmaps(df, build, vmax, vmin, metric='logMFI'):
         s3 = boto3.client('s3')
         s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+        # Close plot
+        plt.close('all')
+
 
 def make_control_violin_plot(df, build, culture):
     # Subset data
@@ -659,6 +705,9 @@ def make_control_violin_plot(df, build, culture):
     g.save(img_data, format='png', width=fig_width, height=fig_height, dpi=100)
     img_data.seek(0)
 
+    # Close the plot
+    plt.close('all')
+
     # Upload to S3
     s3 = boto3.client('s3')
     object_key = f"{build}/{culture}_ctl_violin.png"
@@ -710,6 +759,9 @@ def make_ctlbc_rank_heatmaps(df, build, culture):
     object_key = f"{build}/{culture}_ctlbc_rank_heatmap.png"
     s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+    # Close the plot
+    plt.close('all')
+
 
 def make_ctlbc_rank_violin(df, build, culture, corrs):
     # Subset data and add row/col
@@ -751,6 +803,9 @@ def make_ctlbc_rank_violin(df, build, culture, corrs):
     object_key = f"{build}/{culture}_ctlbc_rank_violin.png"
     s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+    # Close the plot
+    plt.close('all')
+
 
 def make_control_norm_plots(mfi, qc, culture, build):
     df = mfi.merge(qc[['prism_replicate', 'ccle_name', 'pass']], on=['prism_replicate', 'ccle_name'], how='left')
@@ -795,6 +850,9 @@ def make_control_norm_plots(mfi, qc, culture, build):
         object_key = f"{build}/{culture}_{pert}_norm.png"
         s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
 
+        # Close the plot
+        plt.close('all')
+
 
 def heatmap_plate(df, metric, build, culture, facet_method=None, facets=None, limits=None,
                   fig_size=(8, 3), title='', text_size=5, annotation='pert_type_annotation', tick_size=5,
@@ -922,3 +980,6 @@ def heatmap_plate(df, metric, build, culture, facet_method=None, facets=None, li
     s3 = boto3.client('s3')
     object_key = f"{build}/{metric}_{culture}_heatmaps.png"
     s3.upload_fileobj(img_data, 'cup.clue.io', object_key)
+
+    # Close the plot
+    plt.close('all')