Skip to content

Commit d0cce65

Browse files
committed
fix kookmin-sw#1 - fix data type
json data type을 배열로 변환하여 처리후 db 저장
1 parent 92e4b70 commit d0cce65

File tree

7 files changed

+69
-318
lines changed

7 files changed

+69
-318
lines changed

alembic.ini

Lines changed: 0 additions & 116 deletions
This file was deleted.

main.py

Lines changed: 69 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import ast
2+
import json
23
from contextlib import asynccontextmanager
34
from typing import List
45
from databases import Database
@@ -43,8 +44,8 @@ class DataModel(BaseModel):
4344
user: UserCategory
4445
post: PostCategory
4546

46-
4747
DATABASE_URL = "postgresql://cheesecrust:0810jack@mydatabase.c3kmc4wcyz81.ap-northeast-2.rds.amazonaws.com/maru"
48+
# DATABASE_URL = "postgresql://localhost:5432/maru"
4849
database = Database(DATABASE_URL)
4950

5051

@@ -53,7 +54,6 @@ async def lifespan(app: FastAPI):
5354
await database.connect()
5455
yield
5556
await database.disconnect()
56-
# recommendation_database.close()
5757

5858

5959
app = FastAPI(lifespan=lifespan)
@@ -72,30 +72,46 @@ async def lifespan(app: FastAPI):
7272
allow_headers=["*"],
7373
)
7474

75+
def extract_features(data):
76+
if data is None:
77+
return []
78+
79+
parsed_data = json.loads(data)
80+
# "options" 키의 값 추출
81+
options_value = parsed_data['options']
82+
83+
# 문자열에서 배열로 변환
84+
options_array = json.loads(options_value)
85+
86+
# 추가할 키의 값들 추출
87+
smoking_value = parsed_data['smoking']
88+
mate_age_value = parsed_data['mate_age']
89+
room_sharing_option_value = parsed_data['room_sharing_option']
90+
91+
# 추가할 값들을 배열에 추가
92+
options_array.extend([smoking_value, mate_age_value, room_sharing_option_value])
93+
return options_array
7594

7695
def generate_df_data(data):
7796
df = pd.DataFrame(data)
97+
7898
if "features" in df.columns:
7999

80-
# 데이터 정제 (null을 None으로 변환)
81-
df["features"] = df["features"].str.replace('null', 'None')
82-
83-
# 데이터 정제 (숫자를 문자열로 변환)
84-
df["features"] = df["features"].apply(lambda x: str(x))
85-
86-
df["features"] = df["features"].apply(ast.literal_eval)
100+
df["features"] = df["features"].apply(extract_features)
87101

102+
88103
features = (
89104
df["features"]
90105
.apply(pd.Series)
91106
.stack()
92107
.reset_index(level=1, drop=True)
93108
.to_frame("features")
94109
)
95-
110+
96111
dummies = (
97112
pd.get_dummies(features, prefix="", prefix_sep="").groupby(level=0).sum()
98113
)
114+
99115
if "[]" in dummies:
100116
dummies.drop("[]", axis=1, inplace=True)
101117
if "null" in dummies:
@@ -108,7 +124,6 @@ def generate_df_data(data):
108124

109125
def convert_fit_data(df, columns=["id", "gender", "card_type"]):
110126
result = df.drop(columns=columns, axis=1)
111-
print("complete convert_fit_data")
112127
return result
113128

114129

@@ -120,21 +135,20 @@ def feature_card_cosine_similarity(card1, card2):
120135

121136

122137
async def fetch_data():
123-
print("fetch_data")
124138

125139
query = """
126-
SELECT member_id AS id, member_features AS features, gender, 'my' AS card_type, birth_year
140+
SELECT member_id AS id, member_features::jsonb AS features, gender, 'my' AS card_type, birth_year
127141
FROM member_account
128142
JOIN feature_card ON member_account.my_card_id = feature_card.feature_card_id
129143
UNION ALL
130-
SELECT member_id as id, member_features AS features, gender, 'mate' AS card_type, birth_year
144+
SELECT member_id as id, member_features::jsonb AS features, gender, 'mate' AS card_type, birth_year
131145
FROM member_account
132146
JOIN feature_card ON member_account.mate_card_id = feature_card.feature_card_id
133147
"""
134148
user_cards = [dict(record) for record in await database.fetch_all(query)]
135149

136150
query = """
137-
SELECT id, member_features AS features, gender, 'room' AS card_type, member_account.birth_year
151+
SELECT id, member_features::jsonb AS features, gender, 'room' AS card_type, member_account.birth_year
138152
FROM shared_room_post
139153
JOIN feature_card ON shared_room_post.room_mate_card_id = feature_card.feature_card_id
140154
JOIN member_account ON member_account.member_id = shared_room_post.publisher_id
@@ -164,43 +178,41 @@ async def fetch_data():
164178
return user_male_cards, user_female_cards, post_male_cards, post_female_cards
165179

166180
def fill_missing_values(df):
181+
print("fill_missing_values")
167182
imputer = SimpleImputer(strategy='mean')
168183
return imputer.fit_transform(df)
169184

170185
def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_cards):
171186

172187
male_cards = [*user_male_cards, *post_male_cards]
173188
male_df = generate_df_data(male_cards)
189+
print(male_df)
174190

175191
female_cards = [*user_female_cards, *post_female_cards]
176192
female_df = generate_df_data(female_cards)
177193

178-
print("complete generate_df_data")
179-
194+
180195
# 여기의 fit 이 뭔데 값으로??
181196
# 결측값 우선 처리
182197

183198
male_cluster_model = DBSCAN(eps=0.2, min_samples=2)
184199
male_cluster_model.fit(
185-
convert_fit_data(fill_missing_values(male_df))
200+
convert_fit_data(male_df)
186201
)
187202

188-
print("male fit complete")
189-
190203
female_cluster_model = DBSCAN(eps=0.2, min_samples=2)
191204
female_cluster_model.fit(
192-
fill_missing_values(convert_fit_data(female_df))
205+
convert_fit_data(female_df)
193206
)
194-
195-
print("complete clustering")
196-
207+
197208
male_cluster = defaultdict(lambda: [])
198209

199210
find_male_user_cluster = defaultdict(lambda: {"my": None, "mate": None})
200211

201212
for index, cluster in enumerate(
202213
male_cluster_model.fit_predict(convert_fit_data(male_df))
203214
):
215+
print(cluster)
204216
card = male_cards[index]
205217

206218
male_cluster[cluster].append(male_cards[index])
@@ -223,6 +235,7 @@ def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_
223235
male_recommendation_result = defaultdict(
224236
lambda: {"user": {"my": [], "mate": []}, "post": {"my": [], "mate": []}}
225237
)
238+
226239
for cluster, cluster_item in male_cluster.items():
227240
for i, card in enumerate(cluster_item):
228241
if card["card_type"] == "room":
@@ -233,8 +246,8 @@ def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_
233246
for j, other_card in enumerate(cluster_item):
234247
if (
235248
i == j
236-
or card_type == other_card["card_type"]
237-
or user_id == other_card["id"]
249+
# or card_type == other_card["card_type"]
250+
# or user_id == other_card["id"]
238251
):
239252
continue
240253

@@ -269,8 +282,8 @@ def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_
269282
for j, other_card in enumerate(cluster_item):
270283
if (
271284
i == j
272-
or card_type == other_card["card_type"]
273-
or user_id == other_card["id"]
285+
# or card_type == other_card["card_type"]
286+
# or user_id == other_card["id"]
274287
):
275288
continue
276289

@@ -292,13 +305,9 @@ def clustering(user_male_cards, user_female_cards, post_male_cards, post_female_
292305
}
293306
)
294307
print(male_recommendation_result)
295-
# recommendation_collection = recommendation_database.collection("recommendation")
296-
# for male_user_id, recommendation_result in male_recommendation_result.items():
297-
# doc_ref = recommendation_collection.document(f"{male_user_id}")
298-
# doc_ref.set(recommendation_result)
299-
# for female_user_id, recommendation_result in female_recommendation_result.items():
300-
# doc_ref = recommendation_collection.document(f"{female_user_id}")
301-
# doc_ref.set(recommendation_result)
308+
print(female_recommendation_result)
309+
310+
# 여기에 insert
302311

303312

304313
@app.get("/")
@@ -311,7 +320,31 @@ async def update():
311320
user_male_cards, user_female_cards, post_male_cards, post_female_cards = (
312321
await fetch_data()
313322
)
314-
print("fetch complete")
323+
315324
clustering(user_male_cards, user_female_cards, post_male_cards, post_female_cards)
316325
print("clustering complete")
326+
317327
return {"detail": "ok"}
328+
329+
@app.get("/fetch")
330+
async def fetch():
331+
user_male_cards, user_female_cards, post_male_cards, post_female_cards = (
332+
await fetch_data()
333+
)
334+
# print("user male cards : ", user_male_cards)
335+
# print()
336+
# print("user female cards : ", user_female_cards)
337+
# print()
338+
# print(generate_df_data(user_male_cards))
339+
# print()
340+
print(user_female_cards)
341+
print(generate_df_data(user_female_cards))
342+
343+
@app.get("/insert")
344+
async def insert():
345+
query = """
346+
insert into recommend (id, user_id, card_type, recommendation_id, recommendation_card_type, score)
347+
values (2,'test', 'test', 'test', 'tset', 100)
348+
"""
349+
350+
await database.execute(query)

migrations/README

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)