Medchem command improvements

This commit is contained in:
2025-06-25 12:00:29 +02:00
parent a0f0918504
commit 335912ea8a
2 changed files with 136 additions and 5 deletions

View File

@ -70,9 +70,19 @@ type PubChemSearchResponse struct {
} `json:"IdentifierList"`
}
type PubChemSynonymsResponse struct {
InformationList struct {
Information []struct {
CID int `json:"CID"`
Synonym []string `json:"Synonym"`
} `json:"Information"`
} `json:"InformationList"`
}
type CompoundData struct {
CID int
Name string
CommonNames []string // Top 3 most common names
IUPACName string
MolecularFormula string
MolecularWeight float64
@ -156,6 +166,18 @@ func fetchCompoundData(query string) (*CompoundData, error) {
}
compound := parsePubChemData(&pubchemResp.PCCompounds[0], query)
// Fetch common names/synonyms
commonNames, err := fetchCommonNames(compound.CID)
if err == nil && len(commonNames) > 0 {
compound.CommonNames = commonNames
// Use most common name for display
compound.Name = fmt.Sprintf("%s (CID %d)", commonNames[0], compound.CID)
} else {
// Fallback to original query
compound.Name = fmt.Sprintf("%s (CID %d)", query, compound.CID)
}
return compound, nil
}
@ -192,6 +214,84 @@ func searchCompoundCID(query string) (int, error) {
return searchResp.IdentifierList.CID[0], nil
}
func fetchCommonNames(cid int) ([]string, error) {
url := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/%d/synonyms/JSON", cid)
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("synonyms not found")
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
var synonymsResp PubChemSynonymsResponse
if err := json.Unmarshal(body, &synonymsResp); err != nil {
return nil, err
}
if len(synonymsResp.InformationList.Information) == 0 {
return nil, fmt.Errorf("no synonyms found")
}
synonyms := synonymsResp.InformationList.Information[0].Synonym
return filterCommonNames(synonyms), nil
}
func filterCommonNames(synonyms []string) []string {
var commonNames []string
seen := make(map[string]bool)
// Priority filters to find the most "common" names
for _, synonym := range synonyms {
// Skip if already seen or too long/complex
if seen[synonym] || len(synonym) > 40 {
continue
}
// Convert to lowercase for filtering
lower := strings.ToLower(synonym)
// Skip very technical names
if strings.Contains(lower, "iupac") ||
strings.Contains(lower, "cas") ||
strings.Contains(lower, "einecs") ||
strings.Contains(lower, "unii") ||
strings.Contains(lower, "dtxsid") ||
strings.Contains(lower, "pubchem") ||
strings.Contains(lower, "chembl") ||
strings.Contains(lower, "zinc") ||
strings.Contains(lower, "inchi") ||
strings.Contains(lower, "smiles") ||
strings.Contains(lower, "registry") ||
len(synonym) < 3 {
continue
}
// Prefer shorter, simpler names
if len(synonym) <= 30 && !strings.Contains(synonym, "[") && !strings.Contains(synonym, "(") {
commonNames = append([]string{synonym}, commonNames...)
} else {
commonNames = append(commonNames, synonym)
}
seen[synonym] = true
// Limit to top 3
if len(commonNames) >= 3 {
break
}
}
return commonNames
}
func parsePubChemData(compound *PubChemCompound, originalName string) *CompoundData {
data := &CompoundData{
CID: compound.ID.ID.CID,
@ -283,9 +383,22 @@ func formatBasicInfo(c *CompoundData) string {
fmt.Fprintf(b, "🧪 *%s*\n", c.Name)
fmt.Fprintf(b, "📋 *Basic Properties*\n\n")
if c.IUPACName != "" {
fmt.Fprintf(b, "*IUPAC Name:* %s\n", c.IUPACName)
// Show common names first
if len(c.CommonNames) > 0 {
fmt.Fprintf(b, "*Common Names:*\n")
for i, name := range c.CommonNames {
if i >= 3 { // Limit to 3
break
}
fmt.Fprintf(b, "• %s\n", name)
}
fmt.Fprintf(b, "\n")
}
if c.IUPACName != "" {
fmt.Fprintf(b, "*IUPAC Name:* %s\n\n", c.IUPACName)
}
if c.MolecularFormula != "" {
fmt.Fprintf(b, "*Formula:* `%s`\n", c.MolecularFormula)
}
@ -600,7 +713,25 @@ func fetchCompoundDataByCID(cid int) (*CompoundData, error) {
return nil, fmt.Errorf("no compound data found")
}
compound := parsePubChemData(&pubchemResp.PCCompounds[0], fmt.Sprintf("CID %d", cid))
compound := parsePubChemData(&pubchemResp.PCCompounds[0], "")
// Fetch common names/synonyms
commonNames, err := fetchCommonNames(cid)
if err == nil && len(commonNames) > 0 {
compound.CommonNames = commonNames
// Use most common name for display
compound.Name = fmt.Sprintf("%s (CID %d)", commonNames[0], cid)
} else if compound.IUPACName != "" {
// Fallback to IUPAC name, truncate if too long
name := compound.IUPACName
if len(name) > 50 {
name = name[:47] + "..."
}
compound.Name = fmt.Sprintf("%s (CID %d)", name, cid)
} else {
compound.Name = fmt.Sprintf("CID %d", cid)
}
return compound, nil
}

View File

@ -29,13 +29,13 @@ func (m MolCommand) Execute(update tgbotapi.Update, bot *tgbotapi.BotAPI) {
}
cid, err := fetchPubchemCID(args)
if err != nil {
msg := tgbotapi.NewMessage(update.Message.Chat.ID, "Structure not found")
msg := tgbotapi.NewMessage(update.Message.Chat.ID, "Structure not found, maybe try the superior /medchem command")
bot.Send(msg)
return
}
imgURL := fmt.Sprintf("https://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?t=l&cid=%s", cid)
photo := tgbotapi.NewPhoto(update.Message.Chat.ID, tgbotapi.FileURL(imgURL))
photo.Caption = args
photo.Caption = args + "\nIf you want more info, try the /medchem command instead"
bot.Send(photo)
}