11import ast
2+ import json
23from contextlib import asynccontextmanager
34from typing import List
45from databases import Database
@@ -43,8 +44,8 @@ class DataModel(BaseModel):
4344 user : UserCategory
4445 post : PostCategory
4546
46-
4747DATABASE_URL = "postgresql://cheesecrust:0810jack@mydatabase.c3kmc4wcyz81.ap-northeast-2.rds.amazonaws.com/maru"
48+ # DATABASE_URL = "postgresql://localhost:5432/maru"
4849database = Database (DATABASE_URL )
4950
5051
@@ -53,7 +54,6 @@ async def lifespan(app: FastAPI):
5354 await database .connect ()
5455 yield
5556 await database .disconnect ()
56- # recommendation_database.close()
5757
5858
5959app = FastAPI (lifespan = lifespan )
@@ -72,30 +72,46 @@ async def lifespan(app: FastAPI):
7272 allow_headers = ["*" ],
7373)
7474
75+ def extract_features (data ):
76+ if data is None :
77+ return []
78+
79+ parsed_data = json .loads (data )
80+ # "options" 키의 값 추출
81+ options_value = parsed_data ['options' ]
82+
83+ # 문자열에서 배열로 변환
84+ options_array = json .loads (options_value )
85+
86+ # 추가할 키의 값들 추출
87+ smoking_value = parsed_data ['smoking' ]
88+ mate_age_value = parsed_data ['mate_age' ]
89+ room_sharing_option_value = parsed_data ['room_sharing_option' ]
90+
91+ # 추가할 값들을 배열에 추가
92+ options_array .extend ([smoking_value , mate_age_value , room_sharing_option_value ])
93+ return options_array
7594
7695def generate_df_data (data ):
7796 df = pd .DataFrame (data )
97+
7898 if "features" in df .columns :
7999
80- # 데이터 정제 (null을 None으로 변환)
81- df ["features" ] = df ["features" ].str .replace ('null' , 'None' )
82-
83- # 데이터 정제 (숫자를 문자열로 변환)
84- df ["features" ] = df ["features" ].apply (lambda x : str (x ))
85-
86- df ["features" ] = df ["features" ].apply (ast .literal_eval )
100+ df ["features" ] = df ["features" ].apply (extract_features )
87101
102+
88103 features = (
89104 df ["features" ]
90105 .apply (pd .Series )
91106 .stack ()
92107 .reset_index (level = 1 , drop = True )
93108 .to_frame ("features" )
94109 )
95-
110+
96111 dummies = (
97112 pd .get_dummies (features , prefix = "" , prefix_sep = "" ).groupby (level = 0 ).sum ()
98113 )
114+
99115 if "[]" in dummies :
100116 dummies .drop ("[]" , axis = 1 , inplace = True )
101117 if "null" in dummies :
@@ -108,7 +124,6 @@ def generate_df_data(data):
108124
109125def convert_fit_data (df , columns = ["id" , "gender" , "card_type" ]):
110126 result = df .drop (columns = columns , axis = 1 )
111- print ("complete convert_fit_data" )
112127 return result
113128
114129
@@ -120,21 +135,20 @@ def feature_card_cosine_similarity(card1, card2):
120135
121136
122137async def fetch_data ():
123- print ("fetch_data" )
124138
125139 query = """
126- SELECT member_id AS id, member_features AS features, gender, 'my' AS card_type, birth_year
140+ SELECT member_id AS id, member_features::jsonb AS features, gender, 'my' AS card_type, birth_year
127141 FROM member_account
128142 JOIN feature_card ON member_account.my_card_id = feature_card.feature_card_id
129143 UNION ALL
130- SELECT member_id as id, member_features AS features, gender, 'mate' AS card_type, birth_year
144+ SELECT member_id as id, member_features::jsonb AS features, gender, 'mate' AS card_type, birth_year
131145 FROM member_account
132146 JOIN feature_card ON member_account.mate_card_id = feature_card.feature_card_id
133147 """
134148 user_cards = [dict (record ) for record in await database .fetch_all (query )]
135149
136150 query = """
137- SELECT id, member_features AS features, gender, 'room' AS card_type, member_account.birth_year
151+ SELECT id, member_features::jsonb AS features, gender, 'room' AS card_type, member_account.birth_year
138152 FROM shared_room_post
139153 JOIN feature_card ON shared_room_post.room_mate_card_id = feature_card.feature_card_id
140154 JOIN member_account ON member_account.member_id = shared_room_post.publisher_id
@@ -164,43 +178,41 @@ async def fetch_data():
164178 return user_male_cards , user_female_cards , post_male_cards , post_female_cards
165179
166180def fill_missing_values (df ):
181+ print ("fill_missing_values" )
167182 imputer = SimpleImputer (strategy = 'mean' )
168183 return imputer .fit_transform (df )
169184
170185def clustering (user_male_cards , user_female_cards , post_male_cards , post_female_cards ):
171186
172187 male_cards = [* user_male_cards , * post_male_cards ]
173188 male_df = generate_df_data (male_cards )
189+ print (male_df )
174190
175191 female_cards = [* user_female_cards , * post_female_cards ]
176192 female_df = generate_df_data (female_cards )
177193
178- print ("complete generate_df_data" )
179-
194+
180195 # 여기의 fit 이 뭔데 값으로??
181196 # 결측값 우선 처리
182197
183198 male_cluster_model = DBSCAN (eps = 0.2 , min_samples = 2 )
184199 male_cluster_model .fit (
185- convert_fit_data (fill_missing_values ( male_df ) )
200+ convert_fit_data (male_df )
186201 )
187202
188- print ("male fit complete" )
189-
190203 female_cluster_model = DBSCAN (eps = 0.2 , min_samples = 2 )
191204 female_cluster_model .fit (
192- fill_missing_values ( convert_fit_data (female_df ) )
205+ convert_fit_data (female_df )
193206 )
194-
195- print ("complete clustering" )
196-
207+
197208 male_cluster = defaultdict (lambda : [])
198209
199210 find_male_user_cluster = defaultdict (lambda : {"my" : None , "mate" : None })
200211
201212 for index , cluster in enumerate (
202213 male_cluster_model .fit_predict (convert_fit_data (male_df ))
203214 ):
215+ print (cluster )
204216 card = male_cards [index ]
205217
206218 male_cluster [cluster ].append (male_cards [index ])
@@ -223,6 +235,7 @@ def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_
223235 male_recommendation_result = defaultdict (
224236 lambda : {"user" : {"my" : [], "mate" : []}, "post" : {"my" : [], "mate" : []}}
225237 )
238+
226239 for cluster , cluster_item in male_cluster .items ():
227240 for i , card in enumerate (cluster_item ):
228241 if card ["card_type" ] == "room" :
@@ -233,8 +246,8 @@ def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_
233246 for j , other_card in enumerate (cluster_item ):
234247 if (
235248 i == j
236- or card_type == other_card ["card_type" ]
237- or user_id == other_card ["id" ]
249+ # or card_type == other_card["card_type"]
250+ # or user_id == other_card["id"]
238251 ):
239252 continue
240253
@@ -269,8 +282,8 @@ def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_
269282 for j , other_card in enumerate (cluster_item ):
270283 if (
271284 i == j
272- or card_type == other_card ["card_type" ]
273- or user_id == other_card ["id" ]
285+ # or card_type == other_card["card_type"]
286+ # or user_id == other_card["id"]
274287 ):
275288 continue
276289
@@ -292,13 +305,9 @@ def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_
292305 }
293306 )
294307 print (male_recommendation_result )
295- # recommendation_collection = recommendation_database.collection("recommendation")
296- # for male_user_id, recommendation_result in male_recommendation_result.items():
297- # doc_ref = recommendation_collection.document(f"{male_user_id}")
298- # doc_ref.set(recommendation_result)
299- # for female_user_id, recommendation_result in female_recommendation_result.items():
300- # doc_ref = recommendation_collection.document(f"{female_user_id}")
301- # doc_ref.set(recommendation_result)
308+ print (female_recommendation_result )
309+
310+ # 여기에 insert
302311
303312
304313@app .get ("/" )
@@ -311,7 +320,31 @@ async def update():
311320 user_male_cards , user_female_cards , post_male_cards , post_female_cards = (
312321 await fetch_data ()
313322 )
314- print ( "fetch complete" )
323+
315324 clustering (user_male_cards , user_female_cards , post_male_cards , post_female_cards )
316325 print ("clustering complete" )
326+
317327 return {"detail" : "ok" }
328+
329+ @app .get ("/fetch" )
330+ async def fetch ():
331+ user_male_cards , user_female_cards , post_male_cards , post_female_cards = (
332+ await fetch_data ()
333+ )
334+ # print("user male cards : ", user_male_cards)
335+ # print()
336+ # print("user female cards : ", user_female_cards)
337+ # print()
338+ # print(generate_df_data(user_male_cards))
339+ # print()
340+ print (user_female_cards )
341+ print (generate_df_data (user_female_cards ))
342+
343+ @app .get ("/insert" )
344+ async def insert ():
345+ query = """
346+ insert into recommend (id, user_id, card_type, recommendation_id, recommendation_card_type, score)
347+ values (2,'test', 'test', 'test', 'tset', 100)
348+ """
349+
350+ await database .execute (query )
0 commit comments