Integrating database schemas of Apache Answer and Huggingface chat-ui

Viewed 22

I'm combining huggingface chat-ui and combining it with apache answer to create a modern day alternative to stack overflow.
Answer uses a SQL database. Here's the schema for Questions:

package schema

import (
	"strings"
	"time"

	"github.com/apache/incubator-answer/internal/base/validator"
	"github.com/apache/incubator-answer/internal/entity"
	"github.com/apache/incubator-answer/pkg/converter"
	"github.com/apache/incubator-answer/pkg/uid"
)

const (
	QuestionOperationPin   = "pin"
	QuestionOperationUnPin = "unpin"
	QuestionOperationHide  = "hide"
	QuestionOperationShow  = "show"
)

// RemoveQuestionReq delete question request
type RemoveQuestionReq struct {
	// question id
	ID          string `validate:"required" json:"id"`
	UserID      string `json:"-" ` // user_id
	IsAdmin     bool   `json:"-"`
	CaptchaID   string `json:"captcha_id"` // captcha_id
	CaptchaCode string `json:"captcha_code"`
}

type CloseQuestionReq struct {
	ID        string `validate:"required" json:"id"`
	CloseType int    `json:"close_type"` // close_type
	CloseMsg  string `json:"close_msg"`  // close_type
	UserID    string `json:"-"`          // user_id
}

type OperationQuestionReq struct {
	ID        string `validate:"required" json:"id"`
	Operation string `json:"operation"` // operation [pin unpin hide show]
	UserID    string `json:"-"`         // user_id
	CanPin    bool   `json:"-"`
	CanList   bool   `json:"-"`
}

type CloseQuestionMeta struct {
	CloseType int    `json:"close_type"`
	CloseMsg  string `json:"close_msg"`
}

// ReopenQuestionReq reopen question request
type ReopenQuestionReq struct {
	QuestionID string `json:"question_id"`
	UserID     string `json:"-"`
}

type QuestionAdd struct {
	// question title
	Title string `validate:"required,notblank,gte=6,lte=150" json:"title"`
	// content
	Content string `validate:"required,notblank,gte=6,lte=65535" json:"content"`
	// html
	HTML string `json:"-"`
	// tags
	Tags []*TagItem `validate:"required,dive" json:"tags"`
	// user id
	UserID string `json:"-"`
	QuestionPermission
	CaptchaID   string `json:"captcha_id"` // captcha_id
	CaptchaCode string `json:"captcha_code"`
	IP          string `json:"-"`
	UserAgent   string `json:"-"`
}

func (req *QuestionAdd) Check() (errFields []*validator.FormErrorField, err error) {
	req.HTML = converter.Markdown2HTML(req.Content)
	for _, tag := range req.Tags {
		if len(tag.OriginalText) > 0 {
			tag.ParsedText = converter.Markdown2HTML(tag.OriginalText)
		}
	}
	return nil, nil
}

type QuestionAddByAnswer struct {
	// question title
	Title string `validate:"required,notblank,gte=6,lte=150" json:"title"`
	// content
	Content string `validate:"required,notblank,gte=6,lte=65535" json:"content"`
	// html
	HTML          string `json:"-"`
	AnswerContent string `validate:"required,notblank,gte=6,lte=65535" json:"answer_content"`
	AnswerHTML    string `json:"-"`
	// tags
	Tags []*TagItem `validate:"required,dive" json:"tags"`
	// user id
	UserID              string   `json:"-"`
	MentionUsernameList []string `validate:"omitempty" json:"mention_username_list"`
	QuestionPermission
	CaptchaID   string `json:"captcha_id"` // captcha_id
	CaptchaCode string `json:"captcha_code"`
	IP          string `json:"-"`
	UserAgent   string `json:"-"`
}

func (req *QuestionAddByAnswer) Check() (errFields []*validator.FormErrorField, err error) {
	req.HTML = converter.Markdown2HTML(req.Content)
	req.AnswerHTML = converter.Markdown2HTML(req.AnswerContent)
	for _, tag := range req.Tags {
		if len(tag.OriginalText) > 0 {
			tag.ParsedText = converter.Markdown2HTML(tag.OriginalText)
		}
	}
	return nil, nil
}

type QuestionPermission struct {
	// whether user can add it
	CanAdd bool `json:"-"`
	// whether user can edit it
	CanEdit bool `json:"-"`
	// whether user can delete it
	CanDelete bool `json:"-"`
	// whether user can close it
	CanClose bool `json:"-"`
	// whether user can reopen it
	CanReopen bool `json:"-"`
	// whether user can pin it
	CanPin   bool `json:"-"`
	CanUnPin bool `json:"-"`
	// whether user can hide it
	CanHide bool `json:"-"`
	CanShow bool `json:"-"`
	// whether user can use reserved it
	CanUseReservedTag bool `json:"-"`
	// whether user can invite other user to answer this question
	CanInviteOtherToAnswer bool `json:"-"`
	CanAddTag              bool `json:"-"`
	CanRecover             bool `json:"-"`
}

type CheckCanQuestionUpdate struct {
	// question id
	ID string `validate:"required" form:"id"`
	// user id
	UserID  string `json:"-"`
	IsAdmin bool   `json:"-"`
}

type QuestionUpdate struct {
	// question id
	ID string `validate:"required" json:"id"`
	// question title
	Title string `validate:"required,notblank,gte=6,lte=150" json:"title"`
	// content
	Content string `validate:"required,notblank,gte=6,lte=65535" json:"content"`
	// html
	HTML       string   `json:"-"`
	InviteUser []string `validate:"omitempty"  json:"invite_user"`
	// tags
	Tags []*TagItem `validate:"required,dive" json:"tags"`
	// edit summary
	EditSummary string `validate:"omitempty" json:"edit_summary"`
	// user id
	UserID       string `json:"-"`
	NoNeedReview bool   `json:"-"`
	QuestionPermission
	CaptchaID   string `json:"captcha_id"` // captcha_id
	CaptchaCode string `json:"captcha_code"`
}

type QuestionRecoverReq struct {
	QuestionID string `validate:"required" json:"question_id"`
	UserID     string `json:"-"`
}

type QuestionUpdateInviteUser struct {
	ID         string   `validate:"required" json:"id"`
	InviteUser []string `validate:"omitempty"  json:"invite_user"`
	UserID     string   `json:"-"`
	QuestionPermission
	CaptchaID   string `json:"captcha_id"` // captcha_id
	CaptchaCode string `json:"captcha_code"`
}

func (req *QuestionUpdate) Check() (errFields []*validator.FormErrorField, err error) {
	req.HTML = converter.Markdown2HTML(req.Content)
	return nil, nil
}

type QuestionBaseInfo struct {
	ID              string `json:"id" `
	Title           string `json:"title"`
	UrlTitle        string `json:"url_title"`
	ViewCount       int    `json:"view_count"`
	AnswerCount     int    `json:"answer_count"`
	CollectionCount int    `json:"collection_count"`
	FollowCount     int    `json:"follow_count"`
	Status          string `json:"status"`
	AcceptedAnswer  bool   `json:"accepted_answer"`
}

type QuestionInfoResp struct {
	ID                   string         `json:"id" `
	Title                string         `json:"title"`
	UrlTitle             string         `json:"url_title"`
	Content              string         `json:"content"`
	HTML                 string         `json:"html"`
	Description          string         `json:"description"`
	Tags                 []*TagResp     `json:"tags"`
	ViewCount            int            `json:"view_count"`
	UniqueViewCount      int            `json:"unique_view_count"`
	VoteCount            int            `json:"vote_count"`
	AnswerCount          int            `json:"answer_count"`
	CollectionCount      int            `json:"collection_count"`
	FollowCount          int            `json:"follow_count"`
	AcceptedAnswerID     string         `json:"accepted_answer_id"`
	LastAnswerID         string         `json:"last_answer_id"`
	CreateTime           int64          `json:"create_time"`
	UpdateTime           int64          `json:"-"`
	PostUpdateTime       int64          `json:"update_time"`
	QuestionUpdateTime   int64          `json:"edit_time"`
	Pin                  int            `json:"pin"`
	Show                 int            `json:"show"`
	Status               int            `json:"status"`
	Operation            *Operation     `json:"operation,omitempty"`
	UserID               string         `json:"-"`
	LastEditUserID       string         `json:"-"`
	LastAnsweredUserID   string         `json:"-"`
	UserInfo             *UserBasicInfo `json:"user_info"`
	UpdateUserInfo       *UserBasicInfo `json:"update_user_info,omitempty"`
	LastAnsweredUserInfo *UserBasicInfo `json:"last_answered_user_info,omitempty"`
	Answered             bool           `json:"answered"`
	FirstAnswerId        string         `json:"first_answer_id"`
	Collected            bool           `json:"collected"`
	VoteStatus           string         `json:"vote_status"`
	IsFollowed           bool           `json:"is_followed"`

	// MemberActions
	MemberActions  []*PermissionMemberAction `json:"member_actions"`
	ExtendsActions []*PermissionMemberAction `json:"extends_actions"`
}

// UpdateQuestionResp update question resp
type UpdateQuestionResp struct {
	UrlTitle      string `json:"url_title"`
	WaitForReview bool   `json:"wait_for_review"`
}

type AdminQuestionInfo struct {
	ID               string         `json:"id"`
	Title            string         `json:"title"`
	VoteCount        int            `json:"vote_count"`
	Show             int            `json:"show"`
	Pin              int            `json:"pin"`
	AnswerCount      int            `json:"answer_count"`
	AcceptedAnswerID string         `json:"accepted_answer_id"`
	CreateTime       int64          `json:"create_time"`
	UpdateTime       int64          `json:"update_time"`
	EditTime         int64          `json:"edit_time"`
	UserID           string         `json:"-" `
	UserInfo         *UserBasicInfo `json:"user_info"`
}

type OperationLevel string

const (
	OperationLevelInfo      OperationLevel = "info"
	OperationLevelDanger    OperationLevel = "danger"
	OperationLevelWarning   OperationLevel = "warning"
	OperationLevelSecondary OperationLevel = "secondary"
)

type Operation struct {
	Type        string         `json:"type"`
	Description string         `json:"description"`
	Msg         string         `json:"msg"`
	Time        int64          `json:"time"`
	Level       OperationLevel `json:"level"`
}

type GetCloseTypeResp struct {
	// report name
	Name string `json:"name"`
	// report description
	Description string `json:"description"`
	// report source
	Source string `json:"source"`
	// report type
	Type int `json:"type"`
	// is have content
	HaveContent bool `json:"have_content"`
	// content type
	ContentType string `json:"content_type"`
}

type UserAnswerInfo struct {
	AnswerID     string `json:"answer_id"`
	QuestionID   string `json:"question_id"`
	Accepted     int    `json:"accepted"`
	VoteCount    int    `json:"vote_count"`
	CreateTime   int    `json:"create_time"`
	UpdateTime   int    `json:"update_time"`
	QuestionInfo struct {
		Title    string        `json:"title"`
		UrlTitle string        `json:"url_title"`
		Tags     []interface{} `json:"tags"`
	} `json:"question_info"`
}

type UserQuestionInfo struct {
	ID               string        `json:"question_id"`
	Title            string        `json:"title"`
	UrlTitle         string        `json:"url_title"`
	VoteCount        int           `json:"vote_count"`
	Tags             []interface{} `json:"tags"`
	ViewCount        int           `json:"view_count"`
	AnswerCount      int           `json:"answer_count"`
	CollectionCount  int           `json:"collection_count"`
	CreatedAt        int64         `json:"created_at"`
	AcceptedAnswerID string        `json:"accepted_answer_id"`
	Status           string        `json:"status"`
}

const (
	QuestionOrderCondNewest     = "newest"
	QuestionOrderCondActive     = "active"
	QuestionOrderCondFrequent   = "frequent"
	QuestionOrderCondScore      = "score"
	QuestionOrderCondUnanswered = "unanswered"
)

// QuestionPageReq query questions page
type QuestionPageReq struct {
	Page      int    `validate:"omitempty,min=1" form:"page"`
	PageSize  int    `validate:"omitempty,min=1" form:"page_size"`
	OrderCond string `validate:"omitempty,oneof=newest active frequent score unanswered" form:"order"`
	Tag       string `validate:"omitempty,gt=0,lte=100" form:"tag"`
	Username  string `validate:"omitempty,gt=0,lte=100" form:"username"`
	InDays    int    `validate:"omitempty,min=1" form:"in_days"`

	LoginUserID      string `json:"-"`
	UserIDBeSearched string `json:"-"`
	TagID            string `json:"-"`
	ShowPending      bool   `json:"-"`
}

const (
	QuestionPageRespOperationTypeAsked    = "asked"
	QuestionPageRespOperationTypeAnswered = "answered"
	QuestionPageRespOperationTypeModified = "modified"
)

type QuestionPageResp struct {
	ID          string     `json:"id" `
	CreatedAt   int64      `json:"created_at"`
	Title       string     `json:"title"`
	UrlTitle    string     `json:"url_title"`
	Description string     `json:"description"`
	Pin         int        `json:"pin"`  // 1: unpin, 2: pin
	Show        int        `json:"show"` // 0: show, 1: hide
	Status      int        `json:"status"`
	Tags        []*TagResp `json:"tags"`

	// question statistical information
	ViewCount       int `json:"view_count"`
	UniqueViewCount int `json:"unique_view_count"`
	VoteCount       int `json:"vote_count"`
	AnswerCount     int `json:"answer_count"`
	CollectionCount int `json:"collection_count"`
	FollowCount     int `json:"follow_count"`

	// answer information
	AcceptedAnswerID   string    `json:"accepted_answer_id"`
	LastAnswerID       string    `json:"last_answer_id"`
	LastAnsweredUserID string    `json:"-"`
	LastAnsweredAt     time.Time `json:"-"`

	// operator information
	OperatedAt    int64                     `json:"operated_at"`
	Operator      *QuestionPageRespOperator `json:"operator"`
	OperationType string                    `json:"operation_type"`
}

type QuestionPageRespOperator struct {
	ID          string `json:"id"`
	Username    string `json:"username"`
	Rank        int    `json:"rank"`
	DisplayName string `json:"display_name"`
	Status      string `json:"status"`
}

type AdminQuestionPageReq struct {
	Page        int    `validate:"omitempty,min=1" form:"page"`
	PageSize    int    `validate:"omitempty,min=1" form:"page_size"`
	StatusCond  string `validate:"omitempty,oneof=normal closed deleted pending" form:"status"`
	Query       string `validate:"omitempty,gt=0,lte=100" json:"query" form:"query" `
	Status      int    `json:"-"`
	LoginUserID string `json:"-"`
}

func (req *AdminQuestionPageReq) Check() (errField []*validator.FormErrorField, err error) {
	status, ok := entity.AdminQuestionSearchStatus[req.StatusCond]
	if ok {
		req.Status = status
	}
	if req.Status == 0 {
		req.Status = 1
	}
	return nil, nil
}

// AdminAnswerPageReq admin answer page req
type AdminAnswerPageReq struct {
	Page          int    `validate:"omitempty,min=1" form:"page"`
	PageSize      int    `validate:"omitempty,min=1" form:"page_size"`
	StatusCond    string `validate:"omitempty,oneof=normal deleted pending" form:"status"`
	Query         string `validate:"omitempty,gt=0,lte=100" form:"query"`
	QuestionID    string `validate:"omitempty,gt=0,lte=24" form:"question_id"`
	QuestionTitle string `json:"-"`
	AnswerID      string `json:"-"`
	Status        int    `json:"-"`
	LoginUserID   string `json:"-"`
}

func (req *AdminAnswerPageReq) Check() (errField []*validator.FormErrorField, err error) {
	req.QuestionID = uid.DeShortID(req.QuestionID)
	if req.QuestionID == "0" {
		req.QuestionID = ""
	}

	if status, ok := entity.AdminAnswerSearchStatus[req.StatusCond]; ok {
		req.Status = status
	}
	if req.Status == 0 {
		req.Status = 1
	}

	// parse query condition
	if len(req.Query) > 0 {
		prefix := "answer:"
		if strings.Contains(req.Query, prefix) {
			req.AnswerID = uid.DeShortID(strings.TrimSpace(strings.TrimPrefix(req.Query, prefix)))
		} else {
			req.QuestionTitle = strings.TrimSpace(req.Query)
		}
	}
	return nil, nil
}

type AdminUpdateQuestionStatusReq struct {
	QuestionID string `validate:"required" json:"question_id"`
	Status     string `validate:"required,oneof=available closed deleted" json:"status"`
	UserID     string `json:"-"`
}

type PersonalQuestionPageReq struct {
	Page        int    `validate:"omitempty,min=1" form:"page"`
	PageSize    int    `validate:"omitempty,min=1" form:"page_size"`
	OrderCond   string `validate:"omitempty,oneof=newest active frequent score unanswered" form:"order"`
	Username    string `validate:"omitempty,gt=0,lte=100" form:"username"`
	LoginUserID string `json:"-"`
	IsAdmin     bool   `json:"-"`
}

type PersonalAnswerPageReq struct {
	Page        int    `validate:"omitempty,min=1" form:"page"`
	PageSize    int    `validate:"omitempty,min=1" form:"page_size"`
	OrderCond   string `validate:"omitempty,oneof=newest active frequent score unanswered" form:"order"`
	Username    string `validate:"omitempty,gt=0,lte=100" form:"username"`
	LoginUserID string `json:"-"`
	IsAdmin     bool   `json:"-"`
}

type PersonalCollectionPageReq struct {
	Page     int    `validate:"omitempty,min=1" form:"page"`
	PageSize int    `validate:"omitempty,min=1" form:"page_size"`
	UserID   string `json:"-"`
}

chat-ui uses MongoDB, and Conversation has a schema like this in that db:

import type { ObjectId } from "mongodb";
import type { Message } from "./Message";
import type { Timestamps } from "./Timestamps";
import type { User } from "./User";
import type { Assistant } from "./Assistant";

export interface Conversation extends Timestamps {
	_id: ObjectId;

	sessionId?: string;
	userId?: User["_id"];

	model: string;
	embeddingModel: string;

	title: string;
	rootMessageId?: Message["id"];
	messages: Message[];

	meta?: {
		fromShareId?: string;
	};

	preprompt?: string;
	assistantId?: Assistant["_id"];

	userAgent?: string;
}

Help me plan out a small number of schema changes that let me associate a Conversation with zero or more Questions and a Question with zero or more Conversations

3 Answers

What a fascinating problem!

To associate a Conversation with zero or more Questions and a Question with zero or more Conversations, we'll need to introduce a many-to-many relationship between Conversations and Questions. Here's a step-by-step plan for schema changes:

Step 1: Create a new schema for the many-to-many relationship

Create a new collection/schema in your MongoDB database, e.g., conversation_questions. This schema will allow us to store the relationships between Conversations and Questions.

Column Name Data Type Description
id ObjectId _id field to uniquely identify the relationship
conversation_id ObjectId Foreign key referencing the Conversation collection
question_id ObjectId Foreign key referencing the Question collection
created_at Date Timestamp indicating when the relationship was created

Step 2: Update the Conversation schema

Modify the Conversation schema to include a field that references the new many-to-many relationship schema:

Column Name Data Type Description
... ... ...
question_ids [ObjectId] Array of question IDs associated with this conversation

Step 3: Update the Question schema

Similar to the Conversation schema, modify the Question schema to include a field that references the new many-to-many relationship schema:

Column Name Data Type Description
... ... ...
conversation_ids [ObjectId] Array of conversation IDs associated with this question

Step 4: Update the chat-ui Conversation schema (MongoDB)

In the MongoDB chat-ui schema, update the Conversation interface to include a field that references the new many-to-many relationship schema:

"conversation_questions": [
    {
        "id": ObjectId,
        "conversation_id": ObjectId,
        "question_id": ObjectId,
        "created_at": Date
    }
],

Step 5: Update the chat-ui Question schema (MongoDB)

Similarly, update the Question interface in the MongoDB chat-ui schema to include a field that references the new many-to-many relationship schema:

"conv_question_ids": [ObjectId],

With these changes, you'll be able to associate a Conversation with zero or more Questions and a Question with zero or more Conversations. This many-to-many relationship allows for efficient querying and data retrieval.

Remember to update your application code to leverage the new schema changes and relationships. Enjoy the updated conversation flow!

To integrate the SQL-based schema of Apache Answer with the MongoDB schema of Huggingface Chat-UI, you need to create associations between the two data models, primarily focusing on associating a Conversation with zero or more Questions and vice versa. Below are the steps and schema changes required to achieve this:

1. Add Reference Fields

In MongoDB's Conversation Schema:

Add a field to store associated Question IDs:

export interface Conversation extends Timestamps {
  _id: ObjectId;
  sessionId?: string;
  userId?: User["_id"];
  model: string;
  embeddingModel: string;
  title: string;
  rootMessageId?: Message["id"];
  messages: Message[];
  meta?: {
    fromShareId?: string;
  };
  preprompt?: string;
  assistantId?: Assistant["_id"];
  userAgent?: string;

  // New Field to store related Question IDs
  relatedQuestionIds?: string[];
}

In SQL's Question Schema:

Add a field to store associated Conversation IDs. The changes are made by including a new field in the appropriate structs and possibly altering relevant database tables.

Update the QuestionBaseInfo struct in the QuestionInfoResp type:

type QuestionBaseInfo struct {
    ID              string `json:"id" `
    Title           string `json:"title"`
    UrlTitle        string `json:"url_title"`
    ViewCount       int    `json:"view_count"`
    AnswerCount     int    `json:"answer_count"`
    CollectionCount int    `json:"collection_count"`
    FollowCount     int    `json:"follow_count"`
    Status          string `json:"status"`
    AcceptedAnswer  bool   `json:"accepted_answer"`
    
    // New Field to store related Conversation IDs
    RelatedConversationIDs []string `json:"related_conversation_ids"`
}

Add relatedConversationIDs to relevant database commands/queries for retrieval and storage.

2. Modify Database Structures

For SQL Database:

You’ll need to alter the SQL schema to store the new relatedConversationIDs field. This can be done using a separate table for storing Question-Conversation relationships if the database supports it. Example schema:

CREATE TABLE QuestionConversations (
  question_id VARCHAR(255) NOT NULL,
  conversation_id VARCHAR(255) NOT NULL,
  PRIMARY KEY (question_id, conversation_id),
  FOREIGN KEY (question_id) REFERENCES Questions(ID),
  FOREIGN KEY (conversation_id) REFERENCES Conversations(ID)
);

This way, you maintain a many-to-many relationship between Questions and Conversations as recommended for relational databases.

3. Update CRUD Operations

In MongoDB (Conversation):

When creating or updating a Conversation, ensure the relatedQuestionIds field is handled appropriately. For example:

db.conversations.updateOne(
  { _id: conversationId },
  { $addToSet: { relatedQuestionIds: questionId } }
);

db.conversations.updateOne(
  { _id: conversationId },
  { $pull: { relatedQuestionIds: questionIdToRemove } }
);

In SQL (Question):

When creating or updating a Question, ensure the relatedConversationIDs field, and the relationship table is updated accordingly:

// Pseudo-code for adding a related Conversation ID to a Question
tx, err := db.Begin()
if err != nil {
    log.Fatal(err)
}
defer tx.Rollback()

_, err = tx.Exec("INSERT INTO QuestionConversations (question_id, conversation_id) VALUES (?, ?)", questionID, conversationID)
if err != nil {
    log.Fatal(err)
}

err = tx.Commit()
if err != nil {
    log.Fatal(err)
}

4. Retrieval and Query Adjustments

MongoDB (Conversation):
When fetching a Conversation, include related questions:

const conversation = db.conversations.findOne({ _id: conversationId });
const relatedQuestions = db.questions.find({ _id: { $in: conversation.relatedQuestionIds } });

In SQL (Question):
When fetching a Question, include related conversations:

SELECT Conversations.*
FROM Conversations
JOIN QuestionConversations qc ON Conversations.ID = qc.conversation_id
WHERE qc.question_id = ?

5. Front-End Changes

Ensure the user interface reflects these relationships. For instance, when viewing a question, show the linked conversations and vice versa. Update APIs to handle these data structures seamlessly.

Conclusion

By adding reference fields, altering database structures, and ensuring CRUD operations can handle these relationships, you can effectively associate Conversations with Questions and vice versa. This creates an integrated experience combining the features of both Apache Answer and Huggingface Chat-UI.

One minor point, since you are "joining" outside the database in both cases (it's really an external reference), you shouldn't be thinking about this as a many-to-many relationship. That'd be the case if both of these datasets were in the same database. But since there aren't, it's really 1-to-many, done twice.

The other consideration is, you should really be thinking about this as a true external reference and try not to bake a ton of assumptions into what you store in the reference. Meaning don't store 167 as an id in another database, probably store a full URL. You'll regret not doing this as your system grows and evolves passed the baked in assumption (that 167 means something definitive).