ConsDB  1.0
Tool for creating consensus genomes from variant databases.
Public Member Functions | Static Public Member Functions | Public Attributes | List of all members
RSEntry.RSCollection Class Reference

Public Member Functions

def __init__ (self)
 
def __add__ (self, rsc)
 
def __eq__ (self, rsc)
 
def __getitem__ (self, key)
 
def __iadd__ (self, rsc)
 
def __len__ (self)
 
def __repr__ (self)
 
def __str__ (self)
 
def add_entry (self, e)
 
def add_entry_from_args (self, chrom, rsid, pos, quiet=False)
 
def dump (self, fn, idx_file, c, rsids=['all'], old_size=0, append=False, chunksize=10000)
 
def dump_full (self, fn)
 
def dump_chrs (self, chrs, fp_out, store_all=False, store_maj=False)
 
def dump_vcf (self, fn, pop=None, cons=False, is_maj=False)
 
def get_by_chr (self, chrom)
 
def get_by_chr_pos (self, chrom, pos)
 
def get_by_rsid (self, rsid)
 
def get_major (self, mut=True)
 

Static Public Member Functions

def chrom_to_int (c)
 
def from_1000gp (fn, index_fn, superpop_fn, quiet=False)
 
def from_gnomad (fn, quiet=False)
 
def from_dbsnp (fn, quiet=False)
 
def get_chrom_from_filename (fn)
 
def load_from_file_by_chr_pos (fn, idx_file, c, pos, chunk_idx_dict=None, ret_chunk=False)
 
def load_from_file_by_rsid (fn, idx_file, rsid, chunk_idx_dict=None, ret_chunk=False)
 
def load_from_file_full (fn)
 
def load_from_file_pops (fn, pops, cons=False)
 
def make_chunk_idx_dict (idx_fn, key_fields=[1, 2])
 
def merge_files (fn_list, out_fn, c=None)
 
def open (fn)
 
def parse_file_line (d)
 
def sort_rsidx (rsidx)
 
def sort_rsidx_line (line_split)
 

Public Attributes

 entries
 
 chr_pos_table
 
 rsid_table
 

Detailed Description

    A class to represent a collection of entries.

    Attributes
    --------------------
    entries : dict
        Dictionary that maps from (rsid, chrom, pos) to RSEntry object
    chr_pos_table : dict
        Dictionary that maps from (chrom, pos) to (rsid, chrom, pos)
    rsid_table : dict
        Dictionary that maps from rsid to (rsid, chrom, pos)

    Methods
    --------------------
    add_entry(e)
        Add entry from an RSEntry object
    add_entry_from_args(chrom, rsid, pos, quiet=False)
        Add an entry based on the given arguments
    dump(fn, idx_file, c, rsids = ['all'], old_size=0, append=False,
        chunksize=10000)
        Save entries for the given rsids to the given file
    dump_full(fn)
        Quickly save the whole RSCollection object to the given file
    dump_chrs(chrs, fp_out, store_all=False, store_maj=False)
        Save entries for the given chromosomes
    dump_vcf(fn, pop=None, cons=False, is_maj=False)
        Save entries as a VCF file
    get_by_chr(chrom)
        Return a RSCollection object containing all entries from given chrom
    get_by_chr_pos(chrom, pos)
        Return a RSCollection object containing all entries at given chrom
        and given position
    get_by_rsid(rsid)
        Return a list of RSEntry objects that match the given rsid
    get_major(mut=True)
        Return a RSCollection object containing all major alleles

    Static Methods
    --------------------
    chrom_to_int(c)
        Function to use to sort chromosomes (numeric < X < Y < M)
    from_1000gp(fn, index_fn, superpop_fn, quiet=False)
        Create a new RSCollection object from a 1000 Genomes Project VCF file
    from_gnomad(fn, quiet=False)
        Create a new RSCollection object from a gnomAD VCF file
    from_dbsnp(fn, quiet=False)
        Create a new RSCollection object from a dbSNP JSON file
    get_chrom_from_filename(fn)
        Helper method to parse a filename and find which chromosome it is
    load_from_file_by_chr_pos(fn, idx_file, c, pos, chunk_idx_dict=None,
        ret_chunk=False)
        Find an entry in the given file using given chromosome and position
    load_from_file_by_rsid(fn, idx_file, rsid, chunk_idx_dict=None,
        ret_chunk=False)
        Find an entry in the given file using given rsid
    load_from_file_full(fn)
        Load an entire ConsDB file
    load_from_file_pops(fn, pops, cons=False)
        Only load variants that are major variants for pop
    make_chunk_idx_dict(idx_fn, key_fields=[1,2])
        Make a dict representation of the idx file
    merge_files(fn_list, out_fn, c=None)
        Merge multiple ConsDB files into one
    open(fn)
        Helper method to appropriately open the given file
    parse_file_line(d)
        Create a RSEntry object from given line from ConsDB file
    sort_rsidx(rsidx)
        Helper method to sort a list of rsidx
    sort_rsidx_line(line_split)
        Helper method to sort lines from a ConsDB file

Member Function Documentation

◆ __add__()

def RSEntry.RSCollection.__add__ (   self,
  rsc 
)
Implement addition for two RSCollection objects.

◆ __eq__()

def RSEntry.RSCollection.__eq__ (   self,
  rsc 
)
Implement equality checking for two RSCollection objects.

◆ __getitem__()

def RSEntry.RSCollection.__getitem__ (   self,
  key 
)
Allow access to entries via the [] operator.

◆ __iadd__()

def RSEntry.RSCollection.__iadd__ (   self,
  rsc 
)
Implement incremental addition.

◆ __len__()

def RSEntry.RSCollection.__len__ (   self)
Implement len operator.

◆ __repr__()

def RSEntry.RSCollection.__repr__ (   self)
Implement repr operator.

◆ __str__()

def RSEntry.RSCollection.__str__ (   self)
Implement str operator.

◆ add_entry()

def RSEntry.RSCollection.add_entry (   self,
  e 
)
Add entry from an RSEntry object. Note that this does not create a new
RSEntry object.

Parameters:
e: RSEntry object

◆ add_entry_from_args()

def RSEntry.RSCollection.add_entry_from_args (   self,
  chrom,
  rsid,
  pos,
  quiet = False 
)
Add an entry based on the given arguments.

Parameters:
chrom: Chromosome of the entry being added
rsid: RefSNP ID of the entry being added
pos: Position of the entry being added
quiet: Suppress progress information being printed

◆ chrom_to_int()

def RSEntry.RSCollection.chrom_to_int (   c)
static
Function to use to sort chromosomes (numeric < X < Y < M)

Parameters:
c: Chromosome to convert

◆ dump()

def RSEntry.RSCollection.dump (   self,
  fn,
  idx_file,
  c,
  rsids = ['all'],
  old_size = 0,
  append = False,
  chunksize = 10000 
)
Save entries for the given rsids to the given file.

Split the entries to write into multiple chunks, then write each chunk
separately to the gzip file. This allows for random access when loading.

Parameters:
fn: Filename to save to
idx_file: Filename to save index information to
c: Which chromosome is being saved
rsids: Which RefSNP IDs/(chr, pos) to save
old_size: Prior size of the output file (used for random access)
append: Whether to append to the given output file
chunksize: Number of entries to write at a time

◆ dump_chrs()

def RSEntry.RSCollection.dump_chrs (   self,
  chrs,
  fp_out,
  store_all = False,
  store_maj = False 
)
Save entries for the given chromosomes.

Parameters:
chrs: Chromosomes to save
fp_out: Directory to store the files
store_all: Save file with all entries
store_maj: Save file with only major alleles

◆ dump_full()

def RSEntry.RSCollection.dump_full (   self,
  fn 
)
Quickly save the whole RSCollection object to the given file.

Parameters:
fn: Filename to save to

◆ dump_vcf()

def RSEntry.RSCollection.dump_vcf (   self,
  fn,
  pop = None,
  cons = False,
  is_maj = False 
)
Save entries as a VCF file.

Parameters:
fn: Filename to save to
pop: Population to use
con: Storing a consensus VCF
is_maj: RSCollection already contains only major alleles

◆ from_1000gp()

def RSEntry.RSCollection.from_1000gp (   fn,
  index_fn,
  superpop_fn,
  quiet = False 
)
static
Create a new RSCollection object from a 1000 Genomes Project VCF file.

Parameters:
fn: Filename to load from
index_fn: Filename of the 1000 Genomes .index file to use
superpop_fn: Filename containing superpopulation information
quiet: Disable log/progress messages

◆ from_dbsnp()

def RSEntry.RSCollection.from_dbsnp (   fn,
  quiet = False 
)
static
Create a new RSCollection object from a dbSNP JSON file.

Convert each line in the file into a JSON object and perform various
checks/calculations. Each line becomes one RSEntry object.

Parameters:
fn: Filename to load
quiet: Whether to disable log messages

◆ from_gnomad()

def RSEntry.RSCollection.from_gnomad (   fn,
  quiet = False 
)
static
Create a new RSCollection object from a gnomAD VCF file.

Parameters:
fn: Filename to load
quiet: Whether to disable log messages

◆ get_by_chr()

def RSEntry.RSCollection.get_by_chr (   self,
  chrom 
)
Return a RSCollection object containing all entries from given chrom.

Parameters:
chrom: Chromosome to get

◆ get_by_chr_pos()

def RSEntry.RSCollection.get_by_chr_pos (   self,
  chrom,
  pos 
)
Return a list of all entries with given chromosome and position.

Parameters:
chrom: Chromosome to get
pos: Position to get

◆ get_by_rsid()

def RSEntry.RSCollection.get_by_rsid (   self,
  rsid 
)
Return a list of all entries with given RefSNP ID number.

Parameters:
rsid: RefSNP ID number to get

◆ get_chrom_from_filename()

def RSEntry.RSCollection.get_chrom_from_filename (   fn)
static
Helper method to parse a filename and find which chromosome it is.

Parameters:
fn: Filename to use

◆ get_major()

def RSEntry.RSCollection.get_major (   self,
  mut = True 
)
Return a RSCollection object containing all major alleles.

Parameters:
mut: Only include variants that are different between reference and
    alternate allele

◆ load_from_file_by_chr_pos()

def RSEntry.RSCollection.load_from_file_by_chr_pos (   fn,
  idx_file,
  c,
  pos,
  chunk_idx_dict = None,
  ret_chunk = False 
)
static
Find an entry in the given file using given chromosome and position.

If ret_chunk is True, return both the (rsid, chrom, pos) of the matching
entry and the RSCollection chunk containing the entry. Otherwise, return
just the matching entry.

Parameters:
fn: Filename to use
idx_file: Filename of the index file (will default to {fn}.idx)
c: Chromosome to load
pos: Position to load
chunk_idx_dict: Dictionary mapping (rsid,chrom,pos) combinations to the
    chunk in the ConsDB file containing that entry
ret_chunk: Whether to return the entire chunk loaded (as opposed to only
    returning the RSEntry object)

◆ load_from_file_by_rsid()

def RSEntry.RSCollection.load_from_file_by_rsid (   fn,
  idx_file,
  rsid,
  chunk_idx_dict = None,
  ret_chunk = False 
)
static
Find an entry in the given file using given rsid.

If ret_chunk is True, return both the (rsid, chrom, pos) of the matching
entry and the RSCollection chunk containing the entry. Otherwise, return
just the matching entry.

Use temporary files for compatibility with existing functions that take
filename arguments.

Parameters:
fn: Filename to load
idx_file: Filename of the index file (will default to {fn}.idx)
rsid: RefSNP ID to load
chunk_idx_dict: Dictionary mapping (rsid,chrom,pos) combinations to the
    chunk in the ConsDB file containing that entry
ret_chunk: Whether to return the entire chunk loaded (as opposed to only
    returning the RSEntry object)

◆ load_from_file_full()

def RSEntry.RSCollection.load_from_file_full (   fn)
static
Load an entire ConsDB file.

Parameters:
fn: Filename to load

◆ load_from_file_pops()

def RSEntry.RSCollection.load_from_file_pops (   fn,
  pops,
  cons = False 
)
static
Only load variants that are major variants for pop.

Parameters:
fn: Filename to load
pops: Populations to load
cons: RSCollection object is being used for a consensus
    (will only load one allele for each position in this case)

◆ make_chunk_idx_dict()

def RSEntry.RSCollection.make_chunk_idx_dict (   idx_fn,
  key_fields = [1,2] 
)
static
Make a dict representation of the idx file.
Useful when indexing into a file multiple times in order to avoid
reconstructing this same dict every time.

Use fields [1,2] if loading by chrom, pos. Use field 0 if loading by
RSID.

Parameters:
idx_fn: Filename of the ConsDB index file
key_fields: Which fields in the (rsid,chrom,pos) combination are to be
    used as the keys in the dict

◆ merge_files()

def RSEntry.RSCollection.merge_files (   fn_list,
  out_fn,
  c = None 
)
static
Merge multiple ConsDB files into one.

Parameters:
fn_list: List of filenames to merge
out_fn: Filename to use for the resulting merged file
c: Chromosome of the input files

◆ open()

def RSEntry.RSCollection.open (   fn)
static
Helper method to appropriately open the given file.

Parameters:
fn: Filename to open

◆ parse_file_line()

def RSEntry.RSCollection.parse_file_line (   d)
static
Create a RSEntry object from given line from rsc file.

Parameters:
d: List containing the (rsid,chrom,pos) combination as the first entry
    and the rest of the entry string as the second entry (can be created
    by splitting the RSEntry string by ':')

◆ sort_rsidx()

def RSEntry.RSCollection.sort_rsidx (   rsidx)
static
Helper method to sort a list of rsidx. Meant to be used as a
key for sorting. Returns the chromosome and position of the rsidx, both
in int form.

Parameters:
rsidx: List/tuple of (rsid, chrom, pos)

◆ sort_rsidx_line()

def RSEntry.RSCollection.sort_rsidx_line (   line_split)
static
Helper method to sort lines from a ConsDB file. Meant to be used as a
key for sorting. Returns the chromosome and position of the line, both
in int form.

Parameters:
line_split: List consisting of ['rsid,chrom,pos', 'rest of line']

The documentation for this class was generated from the following file: