Simplified AlphaFold Pipeline
Basic implementation for protein structure analysis using AlphaFold. This code shows how to access, analyze and visualize structural predictions.
import requests
import numpy as np
from Bio.PDB import PDBParser, DSSP
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
class AlphaFoldAnalyzer:
def __init__(self):
self.base_url = "https://alphafold.ebi.ac.uk/files/"
self.parser = PDBParser(QUIET=True)
def download_structure(self, uniprot_id):
"""Download AlphaFold structure for given UniProt ID"""
url = f"{self.base_url}AF-{uniprot_id}-F1-model_v4.pdb"
response = requests.get(url)
if response.status_code == 200:
with open(f"AF-{uniprot_id}.pdb", "wb") as f:
f.write(response.content)
return f"AF-{uniprot_id}.pdb"
else:
raise Exception(f"Structure not found for {uniprot_id}")
def parse_confidence_scores(self, pdb_file):
"""Extract pLDDT confidence scores from B-factor column"""
structure = self.parser.get_structure("protein", pdb_file)
confidences = []
residue_numbers = []
for model in structure:
for chain in model:
for residue in chain:
if residue.has_id("CA"): # Alpha carbon
ca_atom = residue["CA"]
confidences.append(ca_atom.bfactor)
residue_numbers.append(residue.id[1])
return np.array(residue_numbers), np.array(confidences)
def calculate_contact_map(self, pdb_file, cutoff=8.0):
"""Calculate residue contact map"""
structure = self.parser.get_structure("protein", pdb_file)
ca_coords = []
for model in structure:
for chain in model:
for residue in chain:
if residue.has_id("CA"):
ca_coords.append(residue["CA"].coord)
ca_coords = np.array(ca_coords)
# Calculate distance matrix
distances = squareform(pdist(ca_coords))
# Create contact map
contact_map = distances < cutoff
return distances, contact_map
def analyze_secondary_structure(self, pdb_file):
"""Analyze secondary structure using DSSP"""
try:
structure = self.parser.get_structure("protein", pdb_file)
model = structure[0]
# Run DSSP analysis
dssp = DSSP(model, pdb_file)
ss_sequence = ""
ss_counts = {"H": 0, "E": 0, "C": 0} # Helix, Sheet, Coil
for residue in dssp:
ss = residue[2]
if ss in "HGI":
ss_sequence += "H"
ss_counts["H"] += 1
elif ss in "BE":
ss_sequence += "E"
ss_counts["E"] += 1
else:
ss_sequence += "C"
ss_counts["C"] += 1
return ss_sequence, ss_counts
except Exception as e:
print(f"DSSP analysis failed: {e}")
return None, None
def plot_confidence_profile(self, residue_numbers, confidences):
"""Plot confidence score profile"""
plt.figure(figsize=(12, 6))
# Color by confidence level
colors = ['red' if c < 50 else 'orange' if c < 70 else
'yellow' if c < 90 else 'blue' for c in confidences]
plt.scatter(residue_numbers, confidences, c=colors, alpha=0.7)
plt.xlabel('Residue Number')
plt.ylabel('pLDDT Confidence Score')
plt.title('AlphaFold Confidence Profile')
# Add confidence thresholds
plt.axhline(y=90, color='blue', linestyle='--', alpha=0.5, label='Very High (>90)')
plt.axhline(y=70, color='yellow', linestyle='--', alpha=0.5, label='Confident (70-90)')
plt.axhline(y=50, color='orange', linestyle='--', alpha=0.5, label='Low (50-70)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
return plt.gcf()
# Exemplo de uso
def main():
analyzer = AlphaFoldAnalyzer()
# Analisar proteína específica
uniprot_id = "P04637" # p53 tumor suppressor
try:
# Download structure
pdb_file = analyzer.download_structure(uniprot_id)
print(f"Downloaded: {pdb_file}")
# Analyze confidence
residues, confidences = analyzer.parse_confidence_scores(pdb_file)
print(f"Average confidence: {np.mean(confidences):.1f}")
# Calculate contacts
distances, contact_map = analyzer.calculate_contact_map(pdb_file)
print(f"Contact map shape: {contact_map.shape}")
# Secondary structure
ss_seq, ss_counts = analyzer.analyze_secondary_structure(pdb_file)
if ss_counts:
print(f"Secondary structure: {ss_counts}")
# Plot confidence profile
analyzer.plot_confidence_profile(residues, confidences)
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()