update

alexjungaalto · May 17, 2024 · 34c6bfe · 34c6bfe
1 parent 48a8f26
commit 34c6bfe
Show file tree

Hide file tree

Showing 25 changed files with 47,739 additions and 0 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/swenao24/Day1_ERMAnimation.ipynb b/swenao24/Day1_ERMAnimation.ipynb
diff --git a/swenao24/Day1_LinModelDifferentLoss.ipynb b/swenao24/Day1_LinModelDifferentLoss.ipynb
diff --git a/swenao24/Day1_MLMethods.pptx b/swenao24/Day1_MLMethods.pptx
diff --git a/swenao24/Day1_ModelSelDiagnosis.ipynb b/swenao24/Day1_ModelSelDiagnosis.ipynb
diff --git a/swenao24/Day1_ReadFMIDataModelSelection.ipynb b/swenao24/Day1_ReadFMIDataModelSelection.ipynb
diff --git a/swenao24/Day1_ReadInWikiData.ipynb b/swenao24/Day1_ReadInWikiData.ipynb
diff --git a/swenao24/Day1_forest_importances_faces.ipynb b/swenao24/Day1_forest_importances_faces.ipynb
diff --git a/swenao24/Day2_BayesNet.ipynb b/swenao24/Day2_BayesNet.ipynb
diff --git a/swenao24/Day2_FMINetwork.ipynb b/swenao24/Day2_FMINetwork.ipynb
diff --git a/swenao24/Day2_MLNetworks.pptx b/swenao24/Day2_MLNetworks.pptx
diff --git a/swenao24/Day2_NetworkVisualization.ipynb b/swenao24/Day2_NetworkVisualization.ipynb
diff --git a/swenao24/Day2_PGM.ipynb b/swenao24/Day2_PGM.ipynb
diff --git a/swenao24/Day2_SSLoverGraphs.ipynb b/swenao24/Day2_SSLoverGraphs.ipynb
diff --git a/swenao24/Day3_FMIPanel.ipynb b/swenao24/Day3_FMIPanel.ipynb
diff --git a/swenao24/Day3_MLwithPanels.pptx b/swenao24/Day3_MLwithPanels.pptx
diff --git a/swenao24/Day3_PanelData.ipynb b/swenao24/Day3_PanelData.ipynb
diff --git a/swenao24/Day3_PanelModels.ipynb b/swenao24/Day3_PanelModels.ipynb
diff --git a/swenao24/Day4_MLwitText.pptx b/swenao24/Day4_MLwitText.pptx
diff --git a/swenao24/FMIData.csv b/swenao24/FMIData.csv
diff --git a/swenao24/Ideas.tex b/swenao24/Ideas.tex
@@ -0,0 +1,86 @@
+Module 5 - Applied ML		(CET)
+
+===========================================================
+20.05.2024. (08:00 - 12:00) : ML with Python (Generic Data)  
+
+08:00 - 08:50 Live Demo: Empirical Risk Minimization 
+08:50 - 09:00 Break 
+09:00 - 09:50 Loss Functions for Linear Model (Sq. Error vs. HuberLoss vs LogReg and Multinom.LogReg) 
+09:50 - 10:00 Break 
+10:00 - 10:50 Reading in Data: FMI Weather Data, Wikidata
+10:50 - 11:00 Break 
+11:00 - 11:50 Demo: Model Diagnosis and Selection
+11:50 - 12:00 Wrap Up 
+
+
+** Pixel Importances for Face Recognition (Trustworthy AI): Compare LogReg with Trees 
+** 
+** how to turn any data into images ("visualisation") ; showcase geopandas
+** how to read in data from sql (Wikidata) 
+** 
+** Use Pre-Trained Models 
+** Multi-class classification 
+
+========================================================================
+21.05.2024 (08:00 - 12:00): Network Data 
+
+08:00 - 08:50 What is a Network? Examples for Network Data:  Weather Data; Documents 
+Illustrate how to model heterogeneous data e.g., text documents that include reports of varying quality or of different type (labeled/not labeled). These 
+characterisiics could be captured via suitable choices for the edge (weights); using networks for data visualization
+
+08:50 - 09:00 Break 
+09:00 - 09:50 Using Networks for Regularization: Semi-Supervised Learning (Document Classification where only few labeled datapoints)/ 
+09:50 - 10:00 Break 
+10:00 - 10:50 Prob. Graphical Models 
+10:50 - 11:00 Break 
+11:00 - 11:50 Federated Learning
+11:50 - 12:00 Wrap Up  
+
+========================================================================
+22.05.2024 (08:00 - 12:00): Panel Data 
+
+08:00 - 08:50 Time Series Data 
+08:50 - 09:00 Break 
+09:00 - 09:50 Weather Time Series 
+09:50 - 10:00 Break 
+10:00 - 10:50 Panels - A Network of Time Series 
+10:50 - 11:00 Break 
+11:00 - 11:50 Weather Panel 
+11:50 - 12:00 Wrap-Up
+
+
+========================================================================
+23.05.2024 (08:00 - 12:00): Text Data 
+
+08:00 - 08:50 From Tokens to Embeddings 
+08.50 - 09.00 Break
+09.00 - 09.50 Supervised and Unsupervised NLP   
+09:50 - 10:00 Break 
+10.00 - 10.50 Regularization 
+10.50 - 11.00 Break 
+11:00 - 11.50 Using Pre-Trained LLMs
+11.50 - 12.00 Wrap-Up 
+
+22.05.2024 (8.00-12.00): Panel Data 
+
+* Belief networks; causality graphs (time series data) 
+
+23.05.2024 (8.00-12.00): Text Data 
+
+** Reading in Data with pandas, text data 
+
+
+* network visualization (third country students) 
+* feature importance/handling NANs  with decision trees (third country student) 
+* NLP (finding most important paragraph in a document); feature embedding for a document (pdf2vec)
+
+
+
+ https://aaltoee-2024.paas.datacenter.fi/. You can use one of the Python assistant's credentials for the view: 
+
+username: may-assari8b95
+password: 2024	
+
+
+
+
diff --git a/swenao24/MultiLayerNetwork.py b/swenao24/MultiLayerNetwork.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed May 15 20:51:30 2024
+
+@author: junga1
+"""
+
+import matplotlib.pyplot as plt
+import networkx as nx
+from networkx.algorithms import bipartite
+
+# Initialize the multilayer network (using Graph for simplicity here)
+G = nx.Graph()
+
+# Adding nodes for layer 1
+layer1 = ['A1', 'B1', 'C1', 'D1']
+G.add_nodes_from(layer1, layer='Employee')
+
+# Adding nodes for layer 2
+layer2 = ['A2', 'B2', 'C2', 'D2']
+G.add_nodes_from(layer2, layer='Manager')
+
+# Adding edges within layer 1
+G.add_edges_from([('A1', 'B1'), ('B1', 'C2'), ('C1', 'D1')])
+
+# Adding edges between layers
+G.add_edges_from([('A1', 'A2'), ('B1', 'B2'), ('C1', 'C2'), ('D1', 'D2')])
+
+# Plotting the network
+pos = dict()
+pos.update((node, (1, index * 10)) for index, node in enumerate(layer1))  # Layer 1 at x=1
+pos.update((node, (2, index * 10)) for index, node in enumerate(layer2))  # Layer 2 at x=2
+
+# Nodes
+nx.draw_networkx_nodes(G, pos, nodelist=layer1, node_color='lightblue')
+nx.draw_networkx_nodes(G, pos, nodelist=layer2, node_color='lightgreen')
+
+# Edges
+nx.draw_networkx_edges(G, pos, edgelist=G.edges, style='dotted', alpha=0.5)
+
+# Labels
+labels = {node: node for node in G.nodes()}
+nx.draw_networkx_labels(G, pos, labels=labels)
+
+# Layer labels
+plt.text(1, max(pos[node][1] for node in layer1) + 5, 'Layer 1', horizontalalignment='center')
+plt.text(2, max(pos[node][1] for node in layer2) + 5, 'Layer 2', horizontalalignment='center')
+
+
+plt.title("Simple Multilayer Network")
+plt.axis('off')  # Turn off the axis
+plt.show()
diff --git a/swenao24/USPresidGDPWikiData.py b/swenao24/USPresidGDPWikiData.py
@@ -0,0 +1,76 @@
+import requests
+import datetime
+import pandas as pd
+
+def query_wikidata(sparql):
+    url = "https://query.wikidata.org/sparql"
+    headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'}
+    data = requests.get(url, headers=headers, params={'format': 'json', 'query': sparql})
+    return data.json()
+
+def get_us_presidents(last_n_years=80):
+    current_year = datetime.datetime.now().year
+    start_year = current_year - last_n_years
+
+    sparql = """
+    SELECT ?president ?presidentLabel ?startTerm ?endTerm WHERE {
+        ?president p:P39 ?statement.
+        ?statement ps:P39 wd:Q11696; pq:P580 ?startTerm.
+        OPTIONAL {?statement pq:P582 ?endTerm.}
+        FILTER(YEAR(?startTerm) >= """ + str(start_year) + """)
+        SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
+    }
+    ORDER BY ?startTerm
+    """
+    return query_wikidata(sparql)
+
+def get_us_economic_data(year, property_code):
+    # Common function to get economic data like GDP or unemployment rate
+    sparql = f"""
+    SELECT ?year ?value WHERE {{
+        ?us wdt:{property_code} ?value;
+            p:{property_code} [pq:P585 ?date].
+        BIND(YEAR(?date) AS ?year)
+        FILTER(?year = {year})
+        FILTER(?value > 0)
+    }}
+    LIMIT 1
+    """
+    results = query_wikidata(sparql)
+    data = results.get('results', {}).get('bindings', [])
+    if data:
+        return data[0]['value']['value']
+    return None
+
+def main():
+    presidents_data = get_us_presidents()
+    data_list = []
+
+    for president in presidents_data['results']['bindings']:
+        name = president['presidentLabel']['value']
+        start_year = president['startTerm']['value'][:4]
+        end_year = president['endTerm']['value'][:4] if 'endTerm' in president else datetime.datetime.now().year
+
+        # GDP and unemployment rate queries
+        start_gdp = get_us_economic_data(start_year, "P2131")  # GDP
+        end_gdp = get_us_economic_data(end_year, "P2131")
+        start_unemployment = get_us_economic_data(start_year, "P1198")  # Unemployment rate
+        end_unemployment = get_us_economic_data(end_year, "P1198")
+
+        data_list.append({
+            "President": name,
+            "Start Year": start_year,
+            "Start GDP": start_gdp,
+            "End Year": end_year,
+            "End GDP": end_gdp,
+            "Start Unemployment Rate": start_unemployment,
+            "End Unemployment Rate": end_unemployment
+        })
+
+    df = pd.DataFrame(data_list)
+    print(df)
+    df.to_csv("us_presidents_economic_data.csv", index=False)
+
+if __name__ == "__main__":
+    main()
+
diff --git a/swenao24/finland_municipalities.csv b/swenao24/finland_municipalities.csv
@@ -0,0 +1,2 @@
+municipality.type,municipality.value,municipalityLabel.xml:lang,municipalityLabel.type,municipalityLabel.value
+uri,http://www.wikidata.org/entity/Q115988085,en,literal,non-permanent members of the United Nations Security Council
diff --git a/swenao24/us_presidents_economic_data.csv b/swenao24/us_presidents_economic_data.csv
@@ -0,0 +1,16 @@
+President,Start Year,Start GDP,End Year,End GDP,Start Unemployment Rate,End Unemployment Rate
+Harry S. Truman,1945,,1953,,,
+Dwight D. Eisenhower,1953,,1961,23909289978.5861,,
+John F. Kennedy,1961,23909289978.5861,1963,9136689514.09479,,
+Lyndon B. Johnson,1963,9136689514.09479,1969,23909289978.5861,,
+Richard Nixon,1969,23909289978.5861,1974,75931656814.657,,8.4
+Gerald Ford,1974,75931656814.657,1977,3012914131.16971,8.4,8.4
+Jimmy Carter,1977,3012914131.16971,1981,3012914131.16971,8.4,8.4
+Ronald Reagan,1981,3012914131.16971,1989,3012914131.16971,8.4,3
+George H. W. Bush,1989,3012914131.16971,1993,13039352743.9616,3,8.9
+Bill Clinton,1993,13039352743.9616,2001,3012914131.16971,8.9,8.9
+George W. Bush,2001,3012914131.16971,2009,3012914131.16971,8.9,8.9
+Barack Obama,2009,3012914131.16971,2017,23909289978.5861,8.9,16
+Donald Trump,2017,23909289978.5861,2021,14583135237,16,30
+Phil Baker,2018,238308749,2024,,5.6,6.1
+Joe Biden,2021,14583135237,2024,,30,6.1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		municipality.type,municipality.value,municipalityLabel.xml:lang,municipalityLabel.type,municipalityLabel.value
		uri,http://www.wikidata.org/entity/Q115988085,en,literal,non-permanent members of the United Nations Security Council