CREATE TABLE seq (
checksum TEXT,
ac TEXT, -- INSDC sequence accession, when available
len INTEGER, -- could be of type "TEXT"; no need to implement "less than"
seq TEXT,
PRIMARY KEY (checksum) -- what about collisions?
);
CREATE INDEX seq_len ON seq (len)
CREATE INDEX seq_ac ON seq (ac) -- different checksums may have the same AC
CREATE TABLE seq_set ( -- sequence set
ss_id TEXT, -- seq-set identifier
alias TEXT, -- sequence name alias in ss_name
checksum TEXT,
PRIMARY KEY (ss_id, alias) -- this tuple is required to be unique
);
CREATE UNIQUE INDEX ss_checksum ON seq_set (checksum, ss_id)
CREATE TABLE ss_properties ( -- additional properties about a sequence set
ss_id TEXT,
ac TEXT, -- INSDC Genome or Assembly accession, if available
primary_only BOOLEAN, -- contain primary assembly only, or not
analysis_set BOOLEAN, -- GRC-blessed "analysis set", or not
PRIMARY KEY (ss_id)
);
CREATE UNIQUE INDEX ss_ac ON ss_properties (ac, ss_id)