Files
nignoggobot/commands/medchem.go
2025-06-25 11:43:55 +02:00

610 lines
16 KiB
Go
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package commands
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"strconv"
"strings"
tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5"
)
type MedchemCommand struct{}
func (m MedchemCommand) Name() string {
return "medchem"
}
func (m MedchemCommand) Help() string {
return "Get comprehensive medicinal chemistry properties for compounds. Usage: /medchem <compound name>"
}
// PubChem JSON structures
type PubChemCompound struct {
ID struct {
ID struct {
CID int `json:"cid"`
} `json:"id"`
} `json:"id"`
Props []struct {
URN struct {
Label string `json:"label"`
Name string `json:"name,omitempty"`
DataType int `json:"datatype"`
Version string `json:"version,omitempty"`
Software string `json:"software,omitempty"`
Source string `json:"source,omitempty"`
Release string `json:"release,omitempty"`
} `json:"urn"`
Value struct {
IVal int `json:"ival,omitempty"`
FVal float64 `json:"fval,omitempty"`
SVal string `json:"sval,omitempty"`
Binary string `json:"binary,omitempty"`
} `json:"value"`
} `json:"props"`
Atoms struct {
AID []int `json:"aid"`
Element []int `json:"element"`
} `json:"atoms,omitempty"`
Bonds struct {
AID1 []int `json:"aid1"`
AID2 []int `json:"aid2"`
Order []int `json:"order"`
} `json:"bonds,omitempty"`
Count struct {
HeavyAtom int `json:"heavy_atom"`
} `json:"count,omitempty"`
}
type PubChemResponse struct {
PCCompounds []PubChemCompound `json:"PC_Compounds"`
}
type PubChemSearchResponse struct {
IdentifierList struct {
CID []int `json:"CID"`
} `json:"IdentifierList"`
}
type CompoundData struct {
CID int
Name string
IUPACName string
MolecularFormula string
MolecularWeight float64
ExactMass float64
XLogP float64
TPSA float64
Complexity float64
HBondDonors int
HBondAcceptors int
RotatableBonds int
InChI string
InChIKey string
CanonicalSMILES string
HeavyAtomCount int
TotalAtomCount int
BondCount int
}
type PropertyCategory int
const (
CategoryBasic PropertyCategory = iota
CategoryADME
CategoryStructure
CategoryIdentifiers
)
func (m MedchemCommand) Execute(update tgbotapi.Update, bot *tgbotapi.BotAPI) {
query := strings.TrimSpace(update.Message.CommandArguments())
if query == "" {
msg := tgbotapi.NewMessage(update.Message.Chat.ID, "🧪 *Medchem Command*\n\nUsage: `/medchem <compound name>`\n\nExample: `/medchem aspirin`\n\nThis command provides comprehensive medicinal chemistry properties including ADME parameters, structural information, and molecular identifiers.")
msg.ParseMode = "Markdown"
bot.Send(msg)
return
}
// Send "typing" action
typingAction := tgbotapi.NewChatAction(update.Message.Chat.ID, tgbotapi.ChatTyping)
bot.Send(typingAction)
compound, err := fetchCompoundData(query)
if err != nil {
handleError(bot, update.Message.Chat.ID, err, query)
return
}
sendCompoundInfo(bot, update.Message.Chat.ID, compound, CategoryBasic)
}
func fetchCompoundData(query string) (*CompoundData, error) {
// First, search for compound to get CID
cid, err := searchCompoundCID(query)
if err != nil {
return nil, fmt.Errorf("compound not found: %v", err)
}
// Get full compound record
url := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/%d/record/JSON", cid)
resp, err := http.Get(url)
if err != nil {
return nil, fmt.Errorf("network error: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("PubChem API error: status %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %v", err)
}
var pubchemResp PubChemResponse
if err := json.Unmarshal(body, &pubchemResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %v", err)
}
if len(pubchemResp.PCCompounds) == 0 {
return nil, fmt.Errorf("no compound data found")
}
compound := parsePubChemData(&pubchemResp.PCCompounds[0], query)
return compound, nil
}
func searchCompoundCID(query string) (int, error) {
url := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/%s/cids/JSON", strings.ReplaceAll(query, " ", "%20"))
resp, err := http.Get(url)
if err != nil {
return 0, err
}
defer resp.Body.Close()
if resp.StatusCode == 404 {
return 0, fmt.Errorf("compound '%s' not found", query)
}
if resp.StatusCode != 200 {
return 0, fmt.Errorf("search failed with status %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return 0, err
}
var searchResp PubChemSearchResponse
if err := json.Unmarshal(body, &searchResp); err != nil {
return 0, err
}
if len(searchResp.IdentifierList.CID) == 0 {
return 0, fmt.Errorf("no CID found for compound")
}
return searchResp.IdentifierList.CID[0], nil
}
func parsePubChemData(compound *PubChemCompound, originalName string) *CompoundData {
data := &CompoundData{
CID: compound.ID.ID.CID,
Name: originalName,
}
// Parse properties from the props array
for _, prop := range compound.Props {
label := prop.URN.Label
name := prop.URN.Name
switch {
case label == "IUPAC Name" && name == "Preferred":
data.IUPACName = prop.Value.SVal
case label == "Molecular Formula":
data.MolecularFormula = prop.Value.SVal
case label == "Molecular Weight":
if weight, err := strconv.ParseFloat(prop.Value.SVal, 64); err == nil {
data.MolecularWeight = weight
}
case label == "Mass" && name == "Exact":
if mass, err := strconv.ParseFloat(prop.Value.SVal, 64); err == nil {
data.ExactMass = mass
}
case label == "Log P" && name == "XLogP3":
data.XLogP = prop.Value.FVal
case label == "Topological" && name == "Polar Surface Area":
data.TPSA = prop.Value.FVal
case label == "Compound Complexity":
data.Complexity = prop.Value.FVal
case label == "Count" && name == "Hydrogen Bond Donor":
data.HBondDonors = prop.Value.IVal
case label == "Count" && name == "Hydrogen Bond Acceptor":
data.HBondAcceptors = prop.Value.IVal
case label == "Count" && name == "Rotatable Bond":
data.RotatableBonds = prop.Value.IVal
case label == "InChI" && name == "Standard":
data.InChI = prop.Value.SVal
case label == "InChIKey" && name == "Standard":
data.InChIKey = prop.Value.SVal
case label == "SMILES" && name == "Canonical":
data.CanonicalSMILES = prop.Value.SVal
}
}
// Get atom and bond counts
if compound.Count.HeavyAtom > 0 {
data.HeavyAtomCount = compound.Count.HeavyAtom
}
if len(compound.Atoms.AID) > 0 {
data.TotalAtomCount = len(compound.Atoms.AID)
}
if len(compound.Bonds.AID1) > 0 {
data.BondCount = len(compound.Bonds.AID1)
}
return data
}
func sendCompoundInfo(bot *tgbotapi.BotAPI, chatID int64, compound *CompoundData, category PropertyCategory) {
imageURL := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?t=l&cid=%d", compound.CID)
caption := formatCompoundCaption(compound, category)
keyboard := createNavigationKeyboard(compound.CID, category)
photo := tgbotapi.NewPhoto(chatID, tgbotapi.FileURL(imageURL))
photo.Caption = caption
photo.ParseMode = "Markdown"
photo.ReplyMarkup = keyboard
bot.Send(photo)
}
func formatCompoundCaption(compound *CompoundData, category PropertyCategory) string {
switch category {
case CategoryBasic:
return formatBasicInfo(compound)
case CategoryADME:
return formatADMEInfo(compound)
case CategoryStructure:
return formatStructureInfo(compound)
case CategoryIdentifiers:
return formatIdentifiersInfo(compound)
default:
return formatBasicInfo(compound)
}
}
func formatBasicInfo(c *CompoundData) string {
b := &strings.Builder{}
fmt.Fprintf(b, "🧪 *%s*\n", c.Name)
fmt.Fprintf(b, "📋 *Basic Properties*\n\n")
if c.IUPACName != "" {
fmt.Fprintf(b, "*IUPAC Name:* %s\n", c.IUPACName)
}
if c.MolecularFormula != "" {
fmt.Fprintf(b, "*Formula:* `%s`\n", c.MolecularFormula)
}
if c.MolecularWeight > 0 {
fmt.Fprintf(b, "*Molecular Weight:* %.2f g/mol\n", c.MolecularWeight)
}
if c.ExactMass > 0 {
fmt.Fprintf(b, "*Exact Mass:* %.6f\n", c.ExactMass)
}
fmt.Fprintf(b, "\n🔗 [PubChem](%s)",
fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/compound/%d", c.CID))
return b.String()
}
func formatADMEInfo(c *CompoundData) string {
b := &strings.Builder{}
fmt.Fprintf(b, "🧪 *%s*\n", c.Name)
fmt.Fprintf(b, "💊 *ADME Properties*\n\n")
if c.XLogP != 0 {
fmt.Fprintf(b, "*XLogP:* %.2f\n", c.XLogP)
fmt.Fprintf(b, "├ Lipophilicity indicator\n")
}
if c.TPSA > 0 {
fmt.Fprintf(b, "*TPSA:* %.1f Ų\n", c.TPSA)
fmt.Fprintf(b, "├ Topological Polar Surface Area\n")
}
if c.HBondDonors >= 0 {
fmt.Fprintf(b, "*H-bond Donors:* %d\n", c.HBondDonors)
}
if c.HBondAcceptors >= 0 {
fmt.Fprintf(b, "*H-bond Acceptors:* %d\n", c.HBondAcceptors)
}
if c.RotatableBonds >= 0 {
fmt.Fprintf(b, "*Rotatable Bonds:* %d\n", c.RotatableBonds)
fmt.Fprintf(b, "├ Flexibility indicator\n")
}
// Lipinski's Rule of Five analysis
fmt.Fprintf(b, "\n📊 *Lipinski's Rule of Five:*\n")
violations := 0
if c.MolecularWeight > 500 {
violations++
fmt.Fprintf(b, "❌ MW > 500\n")
} else {
fmt.Fprintf(b, "✅ MW ≤ 500\n")
}
if c.XLogP > 5 {
violations++
fmt.Fprintf(b, "❌ XLogP > 5\n")
} else {
fmt.Fprintf(b, "✅ XLogP ≤ 5\n")
}
if c.HBondDonors > 5 {
violations++
fmt.Fprintf(b, "❌ HBD > 5\n")
} else {
fmt.Fprintf(b, "✅ HBD ≤ 5\n")
}
if c.HBondAcceptors > 10 {
violations++
fmt.Fprintf(b, "❌ HBA > 10\n")
} else {
fmt.Fprintf(b, "✅ HBA ≤ 10\n")
}
if violations == 0 {
fmt.Fprintf(b, "\n🎯 *Drug-like* (0 violations)")
} else {
fmt.Fprintf(b, "\n⚠ *%d violation(s)*", violations)
}
return b.String()
}
func formatStructureInfo(c *CompoundData) string {
b := &strings.Builder{}
fmt.Fprintf(b, "🧪 *%s*\n", c.Name)
fmt.Fprintf(b, "🏗️ *Structural Properties*\n\n")
if c.Complexity > 0 {
fmt.Fprintf(b, "*Complexity:* %.0f\n", c.Complexity)
fmt.Fprintf(b, "├ Structural complexity score\n")
}
if c.HeavyAtomCount > 0 {
fmt.Fprintf(b, "*Heavy Atoms:* %d\n", c.HeavyAtomCount)
}
if c.TotalAtomCount > 0 {
fmt.Fprintf(b, "*Total Atoms:* %d\n", c.TotalAtomCount)
}
if c.BondCount > 0 {
fmt.Fprintf(b, "*Bonds:* %d\n", c.BondCount)
}
if c.RotatableBonds >= 0 {
fmt.Fprintf(b, "*Rotatable Bonds:* %d\n", c.RotatableBonds)
}
// Structural complexity assessment
if c.Complexity > 0 {
fmt.Fprintf(b, "\n📈 *Complexity Assessment:*\n")
if c.Complexity < 100 {
fmt.Fprintf(b, "🟢 Simple structure")
} else if c.Complexity < 300 {
fmt.Fprintf(b, "🟡 Moderate complexity")
} else if c.Complexity < 500 {
fmt.Fprintf(b, "🟠 Complex structure")
} else {
fmt.Fprintf(b, "🔴 Highly complex")
}
}
return b.String()
}
func formatIdentifiersInfo(c *CompoundData) string {
b := &strings.Builder{}
fmt.Fprintf(b, "🧪 *%s*\n", c.Name)
fmt.Fprintf(b, "🏷️ *Chemical Identifiers*\n\n")
fmt.Fprintf(b, "*CID:* `%d`\n", c.CID)
if c.InChIKey != "" {
fmt.Fprintf(b, "*InChIKey:*\n`%s`\n\n", c.InChIKey)
}
if c.CanonicalSMILES != "" {
fmt.Fprintf(b, "*SMILES:*\n`%s`\n\n", c.CanonicalSMILES)
}
if c.InChI != "" {
// Truncate InChI if too long
inchi := c.InChI
if len(inchi) > 200 {
inchi = inchi[:197] + "..."
}
fmt.Fprintf(b, "*InChI:*\n`%s`", inchi)
}
return b.String()
}
func createNavigationKeyboard(cid int, currentCategory PropertyCategory) tgbotapi.InlineKeyboardMarkup {
var buttons [][]tgbotapi.InlineKeyboardButton
// Category buttons
categoryRow := []tgbotapi.InlineKeyboardButton{
tgbotapi.NewInlineKeyboardButtonData(getButtonText("📋", currentCategory == CategoryBasic), fmt.Sprintf("medchem:%d:basic", cid)),
tgbotapi.NewInlineKeyboardButtonData(getButtonText("💊", currentCategory == CategoryADME), fmt.Sprintf("medchem:%d:adme", cid)),
}
buttons = append(buttons, categoryRow)
categoryRow2 := []tgbotapi.InlineKeyboardButton{
tgbotapi.NewInlineKeyboardButtonData(getButtonText("🏗️", currentCategory == CategoryStructure), fmt.Sprintf("medchem:%d:structure", cid)),
tgbotapi.NewInlineKeyboardButtonData(getButtonText("🏷️", currentCategory == CategoryIdentifiers), fmt.Sprintf("medchem:%d:identifiers", cid)),
}
buttons = append(buttons, categoryRow2)
return tgbotapi.NewInlineKeyboardMarkup(buttons...)
}
func getButtonText(emoji string, isActive bool) string {
if isActive {
return emoji + " ●"
}
return emoji
}
func handleError(bot *tgbotapi.BotAPI, chatID int64, err error, query string) {
var message string
if strings.Contains(err.Error(), "not found") {
// Try to get suggestions
suggestions := getSuggestions(query)
if len(suggestions) > 0 {
message = fmt.Sprintf("❌ Compound '%s' not found.\n\n💡 *Did you mean:*\n%s",
query, strings.Join(suggestions, "\n"))
} else {
message = fmt.Sprintf("❌ Compound '%s' not found.\n\n💡 *Try:*\n• Check spelling\n• Use common name or IUPAC name\n• Try synonyms (e.g., 'aspirin' instead of 'acetylsalicylic acid')", query)
}
} else if strings.Contains(err.Error(), "network") {
message = "🌐 Network error. Please try again later."
} else {
message = "⚠️ An error occurred while fetching compound data. Please try again."
}
msg := tgbotapi.NewMessage(chatID, message)
msg.ParseMode = "Markdown"
bot.Send(msg)
}
func getSuggestions(query string) []string {
url := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/rest/autocomplete/compound/%s/json?limit=3",
strings.ReplaceAll(query, " ", "%20"))
resp, err := http.Get(url)
if err != nil {
return nil
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil
}
var data struct {
Dictionary struct {
Terms []string `json:"terms"`
} `json:"dictionary"`
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil
}
if err := json.Unmarshal(body, &data); err != nil {
return nil
}
var suggestions []string
for _, term := range data.Dictionary.Terms {
suggestions = append(suggestions, "• "+term)
}
return suggestions
}
// HandleCallback handles inline keyboard button presses for medchem
func (m MedchemCommand) HandleCallback(update tgbotapi.Update, bot *tgbotapi.BotAPI, params []string) {
if len(params) < 2 {
log.Printf("Invalid medchem callback params: %v", params)
return
}
cidStr := params[0]
categoryStr := params[1]
cid, err := strconv.Atoi(cidStr)
if err != nil {
log.Printf("Invalid CID in callback: %s", cidStr)
return
}
var category PropertyCategory
switch categoryStr {
case "basic":
category = CategoryBasic
case "adme":
category = CategoryADME
case "structure":
category = CategoryStructure
case "identifiers":
category = CategoryIdentifiers
default:
log.Printf("Invalid category in callback: %s", categoryStr)
return
}
// Get compound data by CID
compound, err := fetchCompoundDataByCID(cid)
if err != nil {
log.Printf("Error fetching compound data for CID %d: %v", cid, err)
// Send error message
callback := tgbotapi.NewCallback(update.CallbackQuery.ID, "Error loading compound data")
bot.Request(callback)
return
}
// Edit the message caption and keyboard
caption := formatCompoundCaption(compound, category)
keyboard := createNavigationKeyboard(compound.CID, category)
editCaption := tgbotapi.NewEditMessageCaption(
update.CallbackQuery.Message.Chat.ID,
update.CallbackQuery.Message.MessageID,
caption,
)
editCaption.ParseMode = "Markdown"
editCaption.ReplyMarkup = &keyboard
if _, err := bot.Send(editCaption); err != nil {
log.Printf("Error editing message caption: %v", err)
}
}
// fetchCompoundDataByCID fetches compound data directly by CID
func fetchCompoundDataByCID(cid int) (*CompoundData, error) {
url := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/%d/record/JSON", cid)
resp, err := http.Get(url)
if err != nil {
return nil, fmt.Errorf("network error: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("PubChem API error: status %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %v", err)
}
var pubchemResp PubChemResponse
if err := json.Unmarshal(body, &pubchemResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %v", err)
}
if len(pubchemResp.PCCompounds) == 0 {
return nil, fmt.Errorf("no compound data found")
}
compound := parsePubChemData(&pubchemResp.PCCompounds[0], fmt.Sprintf("CID %d", cid))
return compound, nil
}
func init() {
Register(MedchemCommand{})
}