From 335912ea8aab321af07f551f87a58c6187e6cabd Mon Sep 17 00:00:00 2001 From: bdnugget Date: Wed, 25 Jun 2025 12:00:29 +0200 Subject: [PATCH] Medchem command improvements --- commands/medchem.go | 137 +++++++++++++++++++++++++++++++++++++++++++- commands/mol.go | 4 +- 2 files changed, 136 insertions(+), 5 deletions(-) diff --git a/commands/medchem.go b/commands/medchem.go index b0eb3d0..62bc7b5 100644 --- a/commands/medchem.go +++ b/commands/medchem.go @@ -70,9 +70,19 @@ type PubChemSearchResponse struct { } `json:"IdentifierList"` } +type PubChemSynonymsResponse struct { + InformationList struct { + Information []struct { + CID int `json:"CID"` + Synonym []string `json:"Synonym"` + } `json:"Information"` + } `json:"InformationList"` +} + type CompoundData struct { CID int Name string + CommonNames []string // Top 3 most common names IUPACName string MolecularFormula string MolecularWeight float64 @@ -156,6 +166,18 @@ func fetchCompoundData(query string) (*CompoundData, error) { } compound := parsePubChemData(&pubchemResp.PCCompounds[0], query) + + // Fetch common names/synonyms + commonNames, err := fetchCommonNames(compound.CID) + if err == nil && len(commonNames) > 0 { + compound.CommonNames = commonNames + // Use most common name for display + compound.Name = fmt.Sprintf("%s (CID %d)", commonNames[0], compound.CID) + } else { + // Fallback to original query + compound.Name = fmt.Sprintf("%s (CID %d)", query, compound.CID) + } + return compound, nil } @@ -192,6 +214,84 @@ func searchCompoundCID(query string) (int, error) { return searchResp.IdentifierList.CID[0], nil } +func fetchCommonNames(cid int) ([]string, error) { + url := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/%d/synonyms/JSON", cid) + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("synonyms not found") + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + var synonymsResp PubChemSynonymsResponse + if err := json.Unmarshal(body, &synonymsResp); err != nil { + return nil, err + } + + if len(synonymsResp.InformationList.Information) == 0 { + return nil, fmt.Errorf("no synonyms found") + } + + synonyms := synonymsResp.InformationList.Information[0].Synonym + return filterCommonNames(synonyms), nil +} + +func filterCommonNames(synonyms []string) []string { + var commonNames []string + seen := make(map[string]bool) + + // Priority filters to find the most "common" names + for _, synonym := range synonyms { + // Skip if already seen or too long/complex + if seen[synonym] || len(synonym) > 40 { + continue + } + + // Convert to lowercase for filtering + lower := strings.ToLower(synonym) + + // Skip very technical names + if strings.Contains(lower, "iupac") || + strings.Contains(lower, "cas") || + strings.Contains(lower, "einecs") || + strings.Contains(lower, "unii") || + strings.Contains(lower, "dtxsid") || + strings.Contains(lower, "pubchem") || + strings.Contains(lower, "chembl") || + strings.Contains(lower, "zinc") || + strings.Contains(lower, "inchi") || + strings.Contains(lower, "smiles") || + strings.Contains(lower, "registry") || + len(synonym) < 3 { + continue + } + + // Prefer shorter, simpler names + if len(synonym) <= 30 && !strings.Contains(synonym, "[") && !strings.Contains(synonym, "(") { + commonNames = append([]string{synonym}, commonNames...) + } else { + commonNames = append(commonNames, synonym) + } + + seen[synonym] = true + + // Limit to top 3 + if len(commonNames) >= 3 { + break + } + } + + return commonNames +} + func parsePubChemData(compound *PubChemCompound, originalName string) *CompoundData { data := &CompoundData{ CID: compound.ID.ID.CID, @@ -283,9 +383,22 @@ func formatBasicInfo(c *CompoundData) string { fmt.Fprintf(b, "๐Ÿงช *%s*\n", c.Name) fmt.Fprintf(b, "๐Ÿ“‹ *Basic Properties*\n\n") - if c.IUPACName != "" { - fmt.Fprintf(b, "*IUPAC Name:* %s\n", c.IUPACName) + // Show common names first + if len(c.CommonNames) > 0 { + fmt.Fprintf(b, "*Common Names:*\n") + for i, name := range c.CommonNames { + if i >= 3 { // Limit to 3 + break + } + fmt.Fprintf(b, "โ€ข %s\n", name) + } + fmt.Fprintf(b, "\n") } + + if c.IUPACName != "" { + fmt.Fprintf(b, "*IUPAC Name:* %s\n\n", c.IUPACName) + } + if c.MolecularFormula != "" { fmt.Fprintf(b, "*Formula:* `%s`\n", c.MolecularFormula) } @@ -600,7 +713,25 @@ func fetchCompoundDataByCID(cid int) (*CompoundData, error) { return nil, fmt.Errorf("no compound data found") } - compound := parsePubChemData(&pubchemResp.PCCompounds[0], fmt.Sprintf("CID %d", cid)) + compound := parsePubChemData(&pubchemResp.PCCompounds[0], "") + + // Fetch common names/synonyms + commonNames, err := fetchCommonNames(cid) + if err == nil && len(commonNames) > 0 { + compound.CommonNames = commonNames + // Use most common name for display + compound.Name = fmt.Sprintf("%s (CID %d)", commonNames[0], cid) + } else if compound.IUPACName != "" { + // Fallback to IUPAC name, truncate if too long + name := compound.IUPACName + if len(name) > 50 { + name = name[:47] + "..." + } + compound.Name = fmt.Sprintf("%s (CID %d)", name, cid) + } else { + compound.Name = fmt.Sprintf("CID %d", cid) + } + return compound, nil } diff --git a/commands/mol.go b/commands/mol.go index bf48fb9..314f4a1 100644 --- a/commands/mol.go +++ b/commands/mol.go @@ -29,13 +29,13 @@ func (m MolCommand) Execute(update tgbotapi.Update, bot *tgbotapi.BotAPI) { } cid, err := fetchPubchemCID(args) if err != nil { - msg := tgbotapi.NewMessage(update.Message.Chat.ID, "Structure not found") + msg := tgbotapi.NewMessage(update.Message.Chat.ID, "Structure not found, maybe try the superior /medchem command") bot.Send(msg) return } imgURL := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?t=l&cid=%s", cid) photo := tgbotapi.NewPhoto(update.Message.Chat.ID, tgbotapi.FileURL(imgURL)) - photo.Caption = args + photo.Caption = args + "\nIf you want more info, try the /medchem command instead" bot.Send(photo) }