Skip to content

Commit 5ac3628

Browse files
committed
Data migration & bug fix
1 parent ae96d7d commit 5ac3628

File tree

4 files changed

+63
-17
lines changed

4 files changed

+63
-17
lines changed
Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('SUBJ', 'Subject Level Data', false);
2-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('PREGHX', 'Pregnancy History', false);
3-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('RANTHRO', 'Relatives Anthropometry', false);
4-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('PARENTS', 'Parents', false);
5-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('QS', 'Questionnaire Results', false);
6-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('MB', 'Microbiology', false);
7-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('ANTHRO', 'Anthropometry', false);
8-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('*SPEC', 'Specimen', true);
9-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('AGECAT', 'Age Category', true);
10-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('RELTIVE', 'Relative', true);
11-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('VS', 'Vital Signs', false);
12-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('NT', 'Nutrition', false);
13-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('LB', 'Laboratory', false);
14-
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('SS', 'Socioeconomic Status', false);
1+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('SUBJ', 'Subject Level Data', false) ON CONFLICT DO NOTHING;
2+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('PREGHX', 'Pregnancy History', false) ON CONFLICT DO NOTHING;
3+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('RANTHRO', 'Relatives Anthropometry', false) ON CONFLICT DO NOTHING;
4+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('PARENTS', 'Parents', false) ON CONFLICT DO NOTHING;
5+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('QS', 'Questionnaire Results', false) ON CONFLICT DO NOTHING;
6+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('MB', 'Microbiology', false) ON CONFLICT DO NOTHING;
7+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('ANTHRO', 'Anthropometry', false) ON CONFLICT DO NOTHING;
8+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('*SPEC', 'Specimen', true) ON CONFLICT DO NOTHING;
9+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('AGECAT', 'Age Category', true) ON CONFLICT DO NOTHING;
10+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('RELTIVE', 'Relative', true) ON CONFLICT DO NOTHING;
11+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('VS', 'Vital Signs', false) ON CONFLICT DO NOTHING;
12+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('NT', 'Nutrition', false) ON CONFLICT DO NOTHING;
13+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('LB', 'Laboratory', false) ON CONFLICT DO NOTHING;
14+
INSERT INTO studies_domain (code, label, is_qualifier) VALUES ('SS', 'Socioeconomic Status', false) ON CONFLICT DO NOTHING;

hbgd_data_store_server/studies/management/commands/load_idx.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def get_domain_variable(row, domain, variable_cache=None):
6161
code_idx = DOMAIN_CODE_FORMAT.format(domain=domain.code)
6262
cat_idx = DOMAIN_CAT_FORMAT.format(domain=domain.code)
6363

64-
code = row[code_idx]
64+
code = row.get(code_idx, None)
6565
if code in EMPTY_IDENTIFIERS:
6666
return None
6767

@@ -126,8 +126,11 @@ def get_valid_qualifiers(columns):
126126
raise Exception('Qualifier code must match only one column per file.')
127127
qual_code = cols[0]
128128
suffix_re = qual_code + r'(\w{1,})'
129-
suffix = [re.match(suffix_re, col).group(1) for col in columns
130-
if re.match(suffix_re, col)][0]
129+
potential_suffixes = [re.match(suffix_re, col).group(1) for col in columns
130+
if re.match(suffix_re, col)]
131+
suffix = ''
132+
if len(potential_suffixes) > 0:
133+
suffix = potential_suffixes[0]
131134
valid_qualifiers.append((qual, qual_code, suffix))
132135
return valid_qualifiers
133136

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# -*- coding: utf-8 -*-
2+
# Generated by Django 1.10.5 on 2020-04-20 04:35
3+
from __future__ import unicode_literals
4+
5+
from django.db import migrations, transaction
6+
import os
7+
8+
studyfield_sql_path = os.path.join(os.path.dirname(__file__), '../../data/sql/002_update_studies_studyfield.sql')
9+
studyfield_sql = 'SELECT 0;'
10+
11+
if os.path.exists(studyfield_sql_path):
12+
with open(studyfield_sql_path, 'r') as f:
13+
studyfield_sql = f.read()
14+
15+
16+
def fix_study_variable_list(apps, schema_editor):
17+
StudyVariable = apps.get_model('studies', 'StudyVariable')
18+
objs_to_split = StudyVariable.objects.filter(study_field__field_type='list', value__contains=',')
19+
20+
sep = ','
21+
with transaction.atomic():
22+
for obj in objs_to_split:
23+
# SV.split_list(obj) # because Django is special, copy the code from StudyVariable.split_list
24+
if sep not in obj.value:
25+
return
26+
for val in obj.value.split(sep):
27+
v = val.replace(' ', '')
28+
study_var, _ = StudyVariable.objects.get_or_create(study_field=obj.study_field, value=str(v))
29+
study_var.studies.add(*[s for s in obj.studies.all()])
30+
obj.delete()
31+
32+
33+
class Migration(migrations.Migration):
34+
35+
dependencies = [
36+
('studies', '0014_auto_20170214_0852'),
37+
]
38+
39+
operations = [
40+
migrations.RunSQL(studyfield_sql),
41+
migrations.RunPython(fix_study_variable_list)
42+
]

hbgd_data_store_server/studies/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ def split_list(self, obj, sep=','):
223223
study_var, _ = self.objects.get_or_create(study_field=obj.study_field,
224224
value=str(v))
225225
study_var.studies.add(*[s for s in obj.studies.all()])
226+
obj.delete()
226227

227228
@classmethod
228229
def get_dataframe(self, **kwargs):

0 commit comments

Comments
 (0)