Source code for proteinshake.datasets.enzyme_commission
import json
from proteinshake.datasets import RCSBDataset
class EnzymeCommissionDataset(RCSBDataset):
""" Enzymes with annotated enzyme commission (EC) numbers.
.. admonition:: Please cite
Berman, H M et al. “The Protein Data Bank.” Nucleic acids research vol. 28,1 (2000): 235-42. doi:10.1093/nar/28.1.235
.. admonition:: Source
Raw data was obtained and modified from `RCSB Protein Data Bank <https://www.rcsb.org/>`_, originally licensed under `CC0 1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_.
.. list-table:: Dataset stats
:widths: 100
:header-rows: 1
* - # proteins
* - 15603
.. list-table:: Annotations
:widths: 25 35 45
:header-rows: 1
* - Attribute
- Key
- Sample value
* - Enzyme Commission
- :code:`protein['protein']['EC']`
- :code:`'2.7.7.4'`
"""
description = 'Enzymes'
def __init__(self, query=[['rcsb_polymer_entity.rcsb_ec_lineage.name','exists']], **kwargs):
"""
Args:
query: REST-API query.
"""
super().__init__(query=query, **kwargs)
def add_protein_attributes(self, protein):
""" Fetch the enzyme class for each protein.
"""
with open(f'{self.root}/raw/files/{protein["protein"]["ID"]}.annot.json','r') as file:
annot = json.load(file)
protein['protein']['EC'] = annot['rcsb_polymer_entity']['rcsb_ec_lineage'][-1]['id']
return protein