diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..b587bbb --- /dev/null +++ b/404.html @@ -0,0 +1 @@ + Python code studies
\ No newline at end of file diff --git a/assets/images/favicon.png b/assets/images/favicon.png new file mode 100644 index 0000000..1cf13b9 Binary files /dev/null and b/assets/images/favicon.png differ diff --git a/assets/javascripts/bundle.51d95adb.min.js b/assets/javascripts/bundle.51d95adb.min.js new file mode 100644 index 0000000..b20ec68 --- /dev/null +++ b/assets/javascripts/bundle.51d95adb.min.js @@ -0,0 +1,29 @@ +"use strict";(()=>{var Hi=Object.create;var xr=Object.defineProperty;var Pi=Object.getOwnPropertyDescriptor;var $i=Object.getOwnPropertyNames,kt=Object.getOwnPropertySymbols,Ii=Object.getPrototypeOf,Er=Object.prototype.hasOwnProperty,an=Object.prototype.propertyIsEnumerable;var on=(e,t,r)=>t in e?xr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,P=(e,t)=>{for(var r in t||(t={}))Er.call(t,r)&&on(e,r,t[r]);if(kt)for(var r of kt(t))an.call(t,r)&&on(e,r,t[r]);return e};var sn=(e,t)=>{var r={};for(var n in e)Er.call(e,n)&&t.indexOf(n)<0&&(r[n]=e[n]);if(e!=null&&kt)for(var n of kt(e))t.indexOf(n)<0&&an.call(e,n)&&(r[n]=e[n]);return r};var Ht=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Fi=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of $i(t))!Er.call(e,o)&&o!==r&&xr(e,o,{get:()=>t[o],enumerable:!(n=Pi(t,o))||n.enumerable});return e};var yt=(e,t,r)=>(r=e!=null?Hi(Ii(e)):{},Fi(t||!e||!e.__esModule?xr(r,"default",{value:e,enumerable:!0}):r,e));var fn=Ht((wr,cn)=>{(function(e,t){typeof wr=="object"&&typeof cn!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(wr,function(){"use strict";function e(r){var n=!0,o=!1,i=null,a={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function s(T){return!!(T&&T!==document&&T.nodeName!=="HTML"&&T.nodeName!=="BODY"&&"classList"in T&&"contains"in T.classList)}function f(T){var Ke=T.type,We=T.tagName;return!!(We==="INPUT"&&a[Ke]&&!T.readOnly||We==="TEXTAREA"&&!T.readOnly||T.isContentEditable)}function c(T){T.classList.contains("focus-visible")||(T.classList.add("focus-visible"),T.setAttribute("data-focus-visible-added",""))}function u(T){T.hasAttribute("data-focus-visible-added")&&(T.classList.remove("focus-visible"),T.removeAttribute("data-focus-visible-added"))}function p(T){T.metaKey||T.altKey||T.ctrlKey||(s(r.activeElement)&&c(r.activeElement),n=!0)}function m(T){n=!1}function d(T){s(T.target)&&(n||f(T.target))&&c(T.target)}function h(T){s(T.target)&&(T.target.classList.contains("focus-visible")||T.target.hasAttribute("data-focus-visible-added"))&&(o=!0,window.clearTimeout(i),i=window.setTimeout(function(){o=!1},100),u(T.target))}function v(T){document.visibilityState==="hidden"&&(o&&(n=!0),B())}function B(){document.addEventListener("mousemove",z),document.addEventListener("mousedown",z),document.addEventListener("mouseup",z),document.addEventListener("pointermove",z),document.addEventListener("pointerdown",z),document.addEventListener("pointerup",z),document.addEventListener("touchmove",z),document.addEventListener("touchstart",z),document.addEventListener("touchend",z)}function re(){document.removeEventListener("mousemove",z),document.removeEventListener("mousedown",z),document.removeEventListener("mouseup",z),document.removeEventListener("pointermove",z),document.removeEventListener("pointerdown",z),document.removeEventListener("pointerup",z),document.removeEventListener("touchmove",z),document.removeEventListener("touchstart",z),document.removeEventListener("touchend",z)}function z(T){T.target.nodeName&&T.target.nodeName.toLowerCase()==="html"||(n=!1,re())}document.addEventListener("keydown",p,!0),document.addEventListener("mousedown",m,!0),document.addEventListener("pointerdown",m,!0),document.addEventListener("touchstart",m,!0),document.addEventListener("visibilitychange",v,!0),B(),r.addEventListener("focus",d,!0),r.addEventListener("blur",h,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var un=Ht(Sr=>{(function(e){var t=function(){try{return!!Symbol.iterator}catch(c){return!1}},r=t(),n=function(c){var u={next:function(){var p=c.shift();return{done:p===void 0,value:p}}};return r&&(u[Symbol.iterator]=function(){return u}),u},o=function(c){return encodeURIComponent(c).replace(/%20/g,"+")},i=function(c){return decodeURIComponent(String(c).replace(/\+/g," "))},a=function(){var c=function(p){Object.defineProperty(this,"_entries",{writable:!0,value:{}});var m=typeof p;if(m!=="undefined")if(m==="string")p!==""&&this._fromString(p);else if(p instanceof c){var d=this;p.forEach(function(re,z){d.append(z,re)})}else if(p!==null&&m==="object")if(Object.prototype.toString.call(p)==="[object Array]")for(var h=0;hd[0]?1:0}),c._entries&&(c._entries={});for(var p=0;p1?i(d[1]):"")}})})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Sr);(function(e){var t=function(){try{var o=new e.URL("b","http://a");return o.pathname="c d",o.href==="http://a/c%20d"&&o.searchParams}catch(i){return!1}},r=function(){var o=e.URL,i=function(f,c){typeof f!="string"&&(f=String(f)),c&&typeof c!="string"&&(c=String(c));var u=document,p;if(c&&(e.location===void 0||c!==e.location.href)){c=c.toLowerCase(),u=document.implementation.createHTMLDocument(""),p=u.createElement("base"),p.href=c,u.head.appendChild(p);try{if(p.href.indexOf(c)!==0)throw new Error(p.href)}catch(T){throw new Error("URL unable to set base "+c+" due to "+T)}}var m=u.createElement("a");m.href=f,p&&(u.body.appendChild(m),m.href=m.href);var d=u.createElement("input");if(d.type="url",d.value=f,m.protocol===":"||!/:/.test(m.href)||!d.checkValidity()&&!c)throw new TypeError("Invalid URL");Object.defineProperty(this,"_anchorElement",{value:m});var h=new e.URLSearchParams(this.search),v=!0,B=!0,re=this;["append","delete","set"].forEach(function(T){var Ke=h[T];h[T]=function(){Ke.apply(h,arguments),v&&(B=!1,re.search=h.toString(),B=!0)}}),Object.defineProperty(this,"searchParams",{value:h,enumerable:!0});var z=void 0;Object.defineProperty(this,"_updateSearchParams",{enumerable:!1,configurable:!1,writable:!1,value:function(){this.search!==z&&(z=this.search,B&&(v=!1,this.searchParams._fromString(this.search),v=!0))}})},a=i.prototype,s=function(f){Object.defineProperty(a,f,{get:function(){return this._anchorElement[f]},set:function(c){this._anchorElement[f]=c},enumerable:!0})};["hash","host","hostname","port","protocol"].forEach(function(f){s(f)}),Object.defineProperty(a,"search",{get:function(){return this._anchorElement.search},set:function(f){this._anchorElement.search=f,this._updateSearchParams()},enumerable:!0}),Object.defineProperties(a,{toString:{get:function(){var f=this;return function(){return f.href}}},href:{get:function(){return this._anchorElement.href.replace(/\?$/,"")},set:function(f){this._anchorElement.href=f,this._updateSearchParams()},enumerable:!0},pathname:{get:function(){return this._anchorElement.pathname.replace(/(^\/?)/,"/")},set:function(f){this._anchorElement.pathname=f},enumerable:!0},origin:{get:function(){var f={"http:":80,"https:":443,"ftp:":21}[this._anchorElement.protocol],c=this._anchorElement.port!=f&&this._anchorElement.port!=="";return this._anchorElement.protocol+"//"+this._anchorElement.hostname+(c?":"+this._anchorElement.port:"")},enumerable:!0},password:{get:function(){return""},set:function(f){},enumerable:!0},username:{get:function(){return""},set:function(f){},enumerable:!0}}),i.createObjectURL=function(f){return o.createObjectURL.apply(o,arguments)},i.revokeObjectURL=function(f){return o.revokeObjectURL.apply(o,arguments)},e.URL=i};if(t()||r(),e.location!==void 0&&!("origin"in e.location)){var n=function(){return e.location.protocol+"//"+e.location.hostname+(e.location.port?":"+e.location.port:"")};try{Object.defineProperty(e.location,"origin",{get:n,enumerable:!0})}catch(o){setInterval(function(){e.location.origin=n()},100)}}})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Sr)});var Qr=Ht((Lt,Kr)=>{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof Lt=="object"&&typeof Kr=="object"?Kr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Lt=="object"?Lt.ClipboardJS=r():t.ClipboardJS=r()})(Lt,function(){return function(){var e={686:function(n,o,i){"use strict";i.d(o,{default:function(){return ki}});var a=i(279),s=i.n(a),f=i(370),c=i.n(f),u=i(817),p=i.n(u);function m(j){try{return document.execCommand(j)}catch(O){return!1}}var d=function(O){var w=p()(O);return m("cut"),w},h=d;function v(j){var O=document.documentElement.getAttribute("dir")==="rtl",w=document.createElement("textarea");w.style.fontSize="12pt",w.style.border="0",w.style.padding="0",w.style.margin="0",w.style.position="absolute",w.style[O?"right":"left"]="-9999px";var k=window.pageYOffset||document.documentElement.scrollTop;return w.style.top="".concat(k,"px"),w.setAttribute("readonly",""),w.value=j,w}var B=function(O,w){var k=v(O);w.container.appendChild(k);var F=p()(k);return m("copy"),k.remove(),F},re=function(O){var w=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},k="";return typeof O=="string"?k=B(O,w):O instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(O==null?void 0:O.type)?k=B(O.value,w):(k=p()(O),m("copy")),k},z=re;function T(j){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?T=function(w){return typeof w}:T=function(w){return w&&typeof Symbol=="function"&&w.constructor===Symbol&&w!==Symbol.prototype?"symbol":typeof w},T(j)}var Ke=function(){var O=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},w=O.action,k=w===void 0?"copy":w,F=O.container,q=O.target,Le=O.text;if(k!=="copy"&&k!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(q!==void 0)if(q&&T(q)==="object"&&q.nodeType===1){if(k==="copy"&&q.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(k==="cut"&&(q.hasAttribute("readonly")||q.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Le)return z(Le,{container:F});if(q)return k==="cut"?h(q):z(q,{container:F})},We=Ke;function Ie(j){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Ie=function(w){return typeof w}:Ie=function(w){return w&&typeof Symbol=="function"&&w.constructor===Symbol&&w!==Symbol.prototype?"symbol":typeof w},Ie(j)}function Ti(j,O){if(!(j instanceof O))throw new TypeError("Cannot call a class as a function")}function nn(j,O){for(var w=0;w0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof F.action=="function"?F.action:this.defaultAction,this.target=typeof F.target=="function"?F.target:this.defaultTarget,this.text=typeof F.text=="function"?F.text:this.defaultText,this.container=Ie(F.container)==="object"?F.container:document.body}},{key:"listenClick",value:function(F){var q=this;this.listener=c()(F,"click",function(Le){return q.onClick(Le)})}},{key:"onClick",value:function(F){var q=F.delegateTarget||F.currentTarget,Le=this.action(q)||"copy",Rt=We({action:Le,container:this.container,target:this.target(q),text:this.text(q)});this.emit(Rt?"success":"error",{action:Le,text:Rt,trigger:q,clearSelection:function(){q&&q.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(F){return yr("action",F)}},{key:"defaultTarget",value:function(F){var q=yr("target",F);if(q)return document.querySelector(q)}},{key:"defaultText",value:function(F){return yr("text",F)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(F){var q=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return z(F,q)}},{key:"cut",value:function(F){return h(F)}},{key:"isSupported",value:function(){var F=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],q=typeof F=="string"?[F]:F,Le=!!document.queryCommandSupported;return q.forEach(function(Rt){Le=Le&&!!document.queryCommandSupported(Rt)}),Le}}]),w}(s()),ki=Ri},828:function(n){var o=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function a(s,f){for(;s&&s.nodeType!==o;){if(typeof s.matches=="function"&&s.matches(f))return s;s=s.parentNode}}n.exports=a},438:function(n,o,i){var a=i(828);function s(u,p,m,d,h){var v=c.apply(this,arguments);return u.addEventListener(m,v,h),{destroy:function(){u.removeEventListener(m,v,h)}}}function f(u,p,m,d,h){return typeof u.addEventListener=="function"?s.apply(null,arguments):typeof m=="function"?s.bind(null,document).apply(null,arguments):(typeof u=="string"&&(u=document.querySelectorAll(u)),Array.prototype.map.call(u,function(v){return s(v,p,m,d,h)}))}function c(u,p,m,d){return function(h){h.delegateTarget=a(h.target,p),h.delegateTarget&&d.call(u,h)}}n.exports=f},879:function(n,o){o.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},o.nodeList=function(i){var a=Object.prototype.toString.call(i);return i!==void 0&&(a==="[object NodeList]"||a==="[object HTMLCollection]")&&"length"in i&&(i.length===0||o.node(i[0]))},o.string=function(i){return typeof i=="string"||i instanceof String},o.fn=function(i){var a=Object.prototype.toString.call(i);return a==="[object Function]"}},370:function(n,o,i){var a=i(879),s=i(438);function f(m,d,h){if(!m&&!d&&!h)throw new Error("Missing required arguments");if(!a.string(d))throw new TypeError("Second argument must be a String");if(!a.fn(h))throw new TypeError("Third argument must be a Function");if(a.node(m))return c(m,d,h);if(a.nodeList(m))return u(m,d,h);if(a.string(m))return p(m,d,h);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(m,d,h){return m.addEventListener(d,h),{destroy:function(){m.removeEventListener(d,h)}}}function u(m,d,h){return Array.prototype.forEach.call(m,function(v){v.addEventListener(d,h)}),{destroy:function(){Array.prototype.forEach.call(m,function(v){v.removeEventListener(d,h)})}}}function p(m,d,h){return s(document.body,m,d,h)}n.exports=f},817:function(n){function o(i){var a;if(i.nodeName==="SELECT")i.focus(),a=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var s=i.hasAttribute("readonly");s||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),s||i.removeAttribute("readonly"),a=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var f=window.getSelection(),c=document.createRange();c.selectNodeContents(i),f.removeAllRanges(),f.addRange(c),a=f.toString()}return a}n.exports=o},279:function(n){function o(){}o.prototype={on:function(i,a,s){var f=this.e||(this.e={});return(f[i]||(f[i]=[])).push({fn:a,ctx:s}),this},once:function(i,a,s){var f=this;function c(){f.off(i,c),a.apply(s,arguments)}return c._=a,this.on(i,c,s)},emit:function(i){var a=[].slice.call(arguments,1),s=((this.e||(this.e={}))[i]||[]).slice(),f=0,c=s.length;for(f;f{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var is=/["'&<>]/;Jo.exports=as;function as(e){var t=""+e,r=is.exec(t);if(!r)return t;var n,o="",i=0,a=0;for(i=r.index;i0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[n++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function W(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var n=r.call(e),o,i=[],a;try{for(;(t===void 0||t-- >0)&&!(o=n.next()).done;)i.push(o.value)}catch(s){a={error:s}}finally{try{o&&!o.done&&(r=n.return)&&r.call(n)}finally{if(a)throw a.error}}return i}function D(e,t,r){if(r||arguments.length===2)for(var n=0,o=t.length,i;n1||s(m,d)})})}function s(m,d){try{f(n[m](d))}catch(h){p(i[0][3],h)}}function f(m){m.value instanceof Xe?Promise.resolve(m.value.v).then(c,u):p(i[0][2],m)}function c(m){s("next",m)}function u(m){s("throw",m)}function p(m,d){m(d),i.shift(),i.length&&s(i[0][0],i[0][1])}}function mn(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof xe=="function"?xe(e):e[Symbol.iterator](),r={},n("next"),n("throw"),n("return"),r[Symbol.asyncIterator]=function(){return this},r);function n(i){r[i]=e[i]&&function(a){return new Promise(function(s,f){a=e[i](a),o(s,f,a.done,a.value)})}}function o(i,a,s,f){Promise.resolve(f).then(function(c){i({value:c,done:s})},a)}}function A(e){return typeof e=="function"}function at(e){var t=function(n){Error.call(n),n.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var $t=at(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(n,o){return o+1+") "+n.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function De(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Fe=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,n,o,i;if(!this.closed){this.closed=!0;var a=this._parentage;if(a)if(this._parentage=null,Array.isArray(a))try{for(var s=xe(a),f=s.next();!f.done;f=s.next()){var c=f.value;c.remove(this)}}catch(v){t={error:v}}finally{try{f&&!f.done&&(r=s.return)&&r.call(s)}finally{if(t)throw t.error}}else a.remove(this);var u=this.initialTeardown;if(A(u))try{u()}catch(v){i=v instanceof $t?v.errors:[v]}var p=this._finalizers;if(p){this._finalizers=null;try{for(var m=xe(p),d=m.next();!d.done;d=m.next()){var h=d.value;try{dn(h)}catch(v){i=i!=null?i:[],v instanceof $t?i=D(D([],W(i)),W(v.errors)):i.push(v)}}}catch(v){n={error:v}}finally{try{d&&!d.done&&(o=m.return)&&o.call(m)}finally{if(n)throw n.error}}}if(i)throw new $t(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)dn(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&De(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&De(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Or=Fe.EMPTY;function It(e){return e instanceof Fe||e&&"closed"in e&&A(e.remove)&&A(e.add)&&A(e.unsubscribe)}function dn(e){A(e)?e():e.unsubscribe()}var Ae={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var st={setTimeout:function(e,t){for(var r=[],n=2;n0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var n=this,o=this,i=o.hasError,a=o.isStopped,s=o.observers;return i||a?Or:(this.currentObservers=null,s.push(r),new Fe(function(){n.currentObservers=null,De(s,r)}))},t.prototype._checkFinalizedStatuses=function(r){var n=this,o=n.hasError,i=n.thrownError,a=n.isStopped;o?r.error(i):a&&r.complete()},t.prototype.asObservable=function(){var r=new U;return r.source=this,r},t.create=function(r,n){return new wn(r,n)},t}(U);var wn=function(e){ne(t,e);function t(r,n){var o=e.call(this)||this;return o.destination=r,o.source=n,o}return t.prototype.next=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.next)===null||o===void 0||o.call(n,r)},t.prototype.error=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.error)===null||o===void 0||o.call(n,r)},t.prototype.complete=function(){var r,n;(n=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||n===void 0||n.call(r)},t.prototype._subscribe=function(r){var n,o;return(o=(n=this.source)===null||n===void 0?void 0:n.subscribe(r))!==null&&o!==void 0?o:Or},t}(E);var Et={now:function(){return(Et.delegate||Date).now()},delegate:void 0};var wt=function(e){ne(t,e);function t(r,n,o){r===void 0&&(r=1/0),n===void 0&&(n=1/0),o===void 0&&(o=Et);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=n,i._timestampProvider=o,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=n===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,n),i}return t.prototype.next=function(r){var n=this,o=n.isStopped,i=n._buffer,a=n._infiniteTimeWindow,s=n._timestampProvider,f=n._windowTime;o||(i.push(r),!a&&i.push(s.now()+f)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var n=this._innerSubscribe(r),o=this,i=o._infiniteTimeWindow,a=o._buffer,s=a.slice(),f=0;f0?e.prototype.requestAsyncId.call(this,r,n,o):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,n,o){var i;if(o===void 0&&(o=0),o!=null?o>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,n,o);var a=r.actions;n!=null&&((i=a[a.length-1])===null||i===void 0?void 0:i.id)!==n&&(ut.cancelAnimationFrame(n),r._scheduled=void 0)},t}(Ut);var On=function(e){ne(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var n=this._scheduled;this._scheduled=void 0;var o=this.actions,i;r=r||o.shift();do if(i=r.execute(r.state,r.delay))break;while((r=o[0])&&r.id===n&&o.shift());if(this._active=!1,i){for(;(r=o[0])&&r.id===n&&o.shift();)r.unsubscribe();throw i}},t}(Wt);var we=new On(Tn);var R=new U(function(e){return e.complete()});function Dt(e){return e&&A(e.schedule)}function kr(e){return e[e.length-1]}function Qe(e){return A(kr(e))?e.pop():void 0}function Se(e){return Dt(kr(e))?e.pop():void 0}function Vt(e,t){return typeof kr(e)=="number"?e.pop():t}var pt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function zt(e){return A(e==null?void 0:e.then)}function Nt(e){return A(e[ft])}function qt(e){return Symbol.asyncIterator&&A(e==null?void 0:e[Symbol.asyncIterator])}function Kt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Ki(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Qt=Ki();function Yt(e){return A(e==null?void 0:e[Qt])}function Gt(e){return ln(this,arguments,function(){var r,n,o,i;return Pt(this,function(a){switch(a.label){case 0:r=e.getReader(),a.label=1;case 1:a.trys.push([1,,9,10]),a.label=2;case 2:return[4,Xe(r.read())];case 3:return n=a.sent(),o=n.value,i=n.done,i?[4,Xe(void 0)]:[3,5];case 4:return[2,a.sent()];case 5:return[4,Xe(o)];case 6:return[4,a.sent()];case 7:return a.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function Bt(e){return A(e==null?void 0:e.getReader)}function $(e){if(e instanceof U)return e;if(e!=null){if(Nt(e))return Qi(e);if(pt(e))return Yi(e);if(zt(e))return Gi(e);if(qt(e))return _n(e);if(Yt(e))return Bi(e);if(Bt(e))return Ji(e)}throw Kt(e)}function Qi(e){return new U(function(t){var r=e[ft]();if(A(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function Yi(e){return new U(function(t){for(var r=0;r=2;return function(n){return n.pipe(e?_(function(o,i){return e(o,i,n)}):me,Oe(1),r?He(t):zn(function(){return new Xt}))}}function Nn(){for(var e=[],t=0;t=2,!0))}function fe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new E}:t,n=e.resetOnError,o=n===void 0?!0:n,i=e.resetOnComplete,a=i===void 0?!0:i,s=e.resetOnRefCountZero,f=s===void 0?!0:s;return function(c){var u,p,m,d=0,h=!1,v=!1,B=function(){p==null||p.unsubscribe(),p=void 0},re=function(){B(),u=m=void 0,h=v=!1},z=function(){var T=u;re(),T==null||T.unsubscribe()};return g(function(T,Ke){d++,!v&&!h&&B();var We=m=m!=null?m:r();Ke.add(function(){d--,d===0&&!v&&!h&&(p=jr(z,f))}),We.subscribe(Ke),!u&&d>0&&(u=new et({next:function(Ie){return We.next(Ie)},error:function(Ie){v=!0,B(),p=jr(re,o,Ie),We.error(Ie)},complete:function(){h=!0,B(),p=jr(re,a),We.complete()}}),$(T).subscribe(u))})(c)}}function jr(e,t){for(var r=[],n=2;ne.next(document)),e}function K(e,t=document){return Array.from(t.querySelectorAll(e))}function V(e,t=document){let r=se(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function se(e,t=document){return t.querySelector(e)||void 0}function _e(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}function tr(e){return L(b(document.body,"focusin"),b(document.body,"focusout")).pipe(ke(1),l(()=>{let t=_e();return typeof t!="undefined"?e.contains(t):!1}),N(e===_e()),Y())}function Be(e){return{x:e.offsetLeft,y:e.offsetTop}}function Yn(e){return L(b(window,"load"),b(window,"resize")).pipe(Ce(0,we),l(()=>Be(e)),N(Be(e)))}function rr(e){return{x:e.scrollLeft,y:e.scrollTop}}function dt(e){return L(b(e,"scroll"),b(window,"resize")).pipe(Ce(0,we),l(()=>rr(e)),N(rr(e)))}var Bn=function(){if(typeof Map!="undefined")return Map;function e(t,r){var n=-1;return t.some(function(o,i){return o[0]===r?(n=i,!0):!1}),n}return function(){function t(){this.__entries__=[]}return Object.defineProperty(t.prototype,"size",{get:function(){return this.__entries__.length},enumerable:!0,configurable:!0}),t.prototype.get=function(r){var n=e(this.__entries__,r),o=this.__entries__[n];return o&&o[1]},t.prototype.set=function(r,n){var o=e(this.__entries__,r);~o?this.__entries__[o][1]=n:this.__entries__.push([r,n])},t.prototype.delete=function(r){var n=this.__entries__,o=e(n,r);~o&&n.splice(o,1)},t.prototype.has=function(r){return!!~e(this.__entries__,r)},t.prototype.clear=function(){this.__entries__.splice(0)},t.prototype.forEach=function(r,n){n===void 0&&(n=null);for(var o=0,i=this.__entries__;o0},e.prototype.connect_=function(){!zr||this.connected_||(document.addEventListener("transitionend",this.onTransitionEnd_),window.addEventListener("resize",this.refresh),xa?(this.mutationsObserver_=new MutationObserver(this.refresh),this.mutationsObserver_.observe(document,{attributes:!0,childList:!0,characterData:!0,subtree:!0})):(document.addEventListener("DOMSubtreeModified",this.refresh),this.mutationEventsAdded_=!0),this.connected_=!0)},e.prototype.disconnect_=function(){!zr||!this.connected_||(document.removeEventListener("transitionend",this.onTransitionEnd_),window.removeEventListener("resize",this.refresh),this.mutationsObserver_&&this.mutationsObserver_.disconnect(),this.mutationEventsAdded_&&document.removeEventListener("DOMSubtreeModified",this.refresh),this.mutationsObserver_=null,this.mutationEventsAdded_=!1,this.connected_=!1)},e.prototype.onTransitionEnd_=function(t){var r=t.propertyName,n=r===void 0?"":r,o=ya.some(function(i){return!!~n.indexOf(i)});o&&this.refresh()},e.getInstance=function(){return this.instance_||(this.instance_=new e),this.instance_},e.instance_=null,e}(),Jn=function(e,t){for(var r=0,n=Object.keys(t);r0},e}(),Zn=typeof WeakMap!="undefined"?new WeakMap:new Bn,eo=function(){function e(t){if(!(this instanceof e))throw new TypeError("Cannot call a class as a function.");if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");var r=Ea.getInstance(),n=new Ra(t,r,this);Zn.set(this,n)}return e}();["observe","unobserve","disconnect"].forEach(function(e){eo.prototype[e]=function(){var t;return(t=Zn.get(this))[e].apply(t,arguments)}});var ka=function(){return typeof nr.ResizeObserver!="undefined"?nr.ResizeObserver:eo}(),to=ka;var ro=new E,Ha=I(()=>H(new to(e=>{for(let t of e)ro.next(t)}))).pipe(x(e=>L(Te,H(e)).pipe(C(()=>e.disconnect()))),J(1));function de(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ge(e){return Ha.pipe(S(t=>t.observe(e)),x(t=>ro.pipe(_(({target:r})=>r===e),C(()=>t.unobserve(e)),l(()=>de(e)))),N(de(e)))}function bt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function ar(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var no=new E,Pa=I(()=>H(new IntersectionObserver(e=>{for(let t of e)no.next(t)},{threshold:0}))).pipe(x(e=>L(Te,H(e)).pipe(C(()=>e.disconnect()))),J(1));function sr(e){return Pa.pipe(S(t=>t.observe(e)),x(t=>no.pipe(_(({target:r})=>r===e),C(()=>t.unobserve(e)),l(({isIntersecting:r})=>r))))}function oo(e,t=16){return dt(e).pipe(l(({y:r})=>{let n=de(e),o=bt(e);return r>=o.height-n.height-t}),Y())}var cr={drawer:V("[data-md-toggle=drawer]"),search:V("[data-md-toggle=search]")};function io(e){return cr[e].checked}function qe(e,t){cr[e].checked!==t&&cr[e].click()}function je(e){let t=cr[e];return b(t,"change").pipe(l(()=>t.checked),N(t.checked))}function $a(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function Ia(){return L(b(window,"compositionstart").pipe(l(()=>!0)),b(window,"compositionend").pipe(l(()=>!1))).pipe(N(!1))}function ao(){let e=b(window,"keydown").pipe(_(t=>!(t.metaKey||t.ctrlKey)),l(t=>({mode:io("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),_(({mode:t,type:r})=>{if(t==="global"){let n=_e();if(typeof n!="undefined")return!$a(n,r)}return!0}),fe());return Ia().pipe(x(t=>t?R:e))}function Me(){return new URL(location.href)}function ot(e){location.href=e.href}function so(){return new E}function co(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)co(e,r)}function M(e,t,...r){let n=document.createElement(e);if(t)for(let o of Object.keys(t))typeof t[o]!="undefined"&&(typeof t[o]!="boolean"?n.setAttribute(o,t[o]):n.setAttribute(o,""));for(let o of r)co(n,o);return n}function fr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function fo(){return location.hash.substring(1)}function uo(e){let t=M("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Fa(){return b(window,"hashchange").pipe(l(fo),N(fo()),_(e=>e.length>0),J(1))}function po(){return Fa().pipe(l(e=>se(`[id="${e}"]`)),_(e=>typeof e!="undefined"))}function Nr(e){let t=matchMedia(e);return Zt(r=>t.addListener(()=>r(t.matches))).pipe(N(t.matches))}function lo(){let e=matchMedia("print");return L(b(window,"beforeprint").pipe(l(()=>!0)),b(window,"afterprint").pipe(l(()=>!1))).pipe(N(e.matches))}function qr(e,t){return e.pipe(x(r=>r?t():R))}function ur(e,t={credentials:"same-origin"}){return ve(fetch(`${e}`,t)).pipe(ce(()=>R),x(r=>r.status!==200?Tt(()=>new Error(r.statusText)):H(r)))}function Ue(e,t){return ur(e,t).pipe(x(r=>r.json()),J(1))}function mo(e,t){let r=new DOMParser;return ur(e,t).pipe(x(n=>n.text()),l(n=>r.parseFromString(n,"text/xml")),J(1))}function pr(e){let t=M("script",{src:e});return I(()=>(document.head.appendChild(t),L(b(t,"load"),b(t,"error").pipe(x(()=>Tt(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(l(()=>{}),C(()=>document.head.removeChild(t)),Oe(1))))}function ho(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function bo(){return L(b(window,"scroll",{passive:!0}),b(window,"resize",{passive:!0})).pipe(l(ho),N(ho()))}function vo(){return{width:innerWidth,height:innerHeight}}function go(){return b(window,"resize",{passive:!0}).pipe(l(vo),N(vo()))}function yo(){return Q([bo(),go()]).pipe(l(([e,t])=>({offset:e,size:t})),J(1))}function lr(e,{viewport$:t,header$:r}){let n=t.pipe(X("size")),o=Q([n,r]).pipe(l(()=>Be(e)));return Q([r,t,o]).pipe(l(([{height:i},{offset:a,size:s},{x:f,y:c}])=>({offset:{x:a.x-f,y:a.y-c+i},size:s})))}(()=>{function e(n,o){parent.postMessage(n,o||"*")}function t(...n){return n.reduce((o,i)=>o.then(()=>new Promise(a=>{let s=document.createElement("script");s.src=i,s.onload=a,document.body.appendChild(s)})),Promise.resolve())}var r=class{constructor(n){this.url=n,this.onerror=null,this.onmessage=null,this.onmessageerror=null,this.m=a=>{a.source===this.w&&(a.stopImmediatePropagation(),this.dispatchEvent(new MessageEvent("message",{data:a.data})),this.onmessage&&this.onmessage(a))},this.e=(a,s,f,c,u)=>{if(s===this.url.toString()){let p=new ErrorEvent("error",{message:a,filename:s,lineno:f,colno:c,error:u});this.dispatchEvent(p),this.onerror&&this.onerror(p)}};let o=new EventTarget;this.addEventListener=o.addEventListener.bind(o),this.removeEventListener=o.removeEventListener.bind(o),this.dispatchEvent=o.dispatchEvent.bind(o);let i=document.createElement("iframe");i.width=i.height=i.frameBorder="0",document.body.appendChild(this.iframe=i),this.w.document.open(),this.w.document.write(`

Python programming for astronomy

The dockerfile defines a python env with astropy, matplotlib, numpy,scipy...

To build and run it:

docker build -t quay.io/jbcodeforce/mypythonplay .
+docker run -ti quay.io/jbcodeforce/mypythonplay bash
+

Pulsars

The goal is to answer: how many pulsars are detected in images taken with the Murchison Widefield Array telescope? The array telescope, detects radio emission at frequencies between 80 and 300 megahertz. It has a very large field of view, which means it's great for doing large survey projects. Images has using a grayscale to measure the flux density of emission from astronomical objects. Black is high flux density and gray is the background noise. Radio frequencies don't have color. These color maps are just used to accentuate different aspects of the intensity scale.

In radio astronomy, flux density is measured in units of Janskys, which is equivalent to 10 to the power of -26 watts per square meter per hertz.

)

Astronomy images are usually stored in a file format called FITS, and to view them you can download software like DS9 or use an online tool like Aladin.

We typically call something a detection if the flux density is more than five standard deviations higher than the noise in the local region.

To search from non-detection, a special approach is used called Stacking which measures the statistical properties of a population we can't detect. Stacking works because the noise in a radio image is roughly random, with a Gaussian distribution centered on zero. When you add regions of an image that just have noise, the random numbers cancel out. But when you add regions of an image in which there are signals, the signals add together, increasing what we call the signal to noise ratio.

Introduction to Pulsars (from CSIRO) Hobbs, M. (n.d.) from http://www.atnf.csiro.au/outreach/education/everyone/pulsars/index.html

Pulsar Properties (from NRAO, advanced) National Radio Astronomy Observatory. (2010) from http://www.cv.nrao.edu/course/astr534/Pulsars.html

Calculating the mean / median stack of a set of FITS images

In Flexible Image Transport System (FITS) the image is stored in a numerical array, which we can load into a NumPy array. Opening a FITS file in astropy returns a HDU (Header/Data Unit) list. Each HDU stores headers and (optionally) image data. Here is a program to find the point in the image with the maximum intensity:

from astropy.io import fits
+def search_brightest_pixel(fname):
+  hdulist = fits.open(fname)
+  data = hdulist[0].data
+  nb_row,nb_col = data.shape
+  max = 0
+  x , y = (0,0)
+  for r in range(0,nb_row):
+    for c in range(0,nb_col):
+      if data[r][c] > max :
+          x = r
+          y = c
+          max = data[r][c]
+  return x,y
+

A better approach is to use the median (the middle of the sorted data set), as the mean is easily skewed by outliers. But getting median could get computational intensive and consuming a lot of memory as calculating the median requires all the data to be in memory at once. This is an issue in typical astrophysics calculations, which may use hundreds of thousands of FITS files. To compute the median we can use the statistics library, or the following approach:

fluxes = [17.3, 70.1, 22.3, 16.2, 20.7]
+fluxes.sort()
+mid = len(fluxes)//2
+median = fluxes[mid]
+# or for an even number of elements
+median = (fluxes[mid - 1] + fluxes[mid])/2
+

or using numpy:

data = load_stack(fnames)
+stack = np.dstack(data)
+median = np.median(stack, axis=2)
+

To avoid loading all the data in memory, we can use the binapprox algorithm to approximate the current median. The idea behind it is to find the median from the data's histogram. Starting from the left, if we sum up the counts in the histogram bins until we get to just over the expected mediam then we know the last bin we added must have contained the median. In fact it is better to search in bins around the standard devision of the mean. See Stacking/binapprox.py code.

AGN: Active Galactic Nucleus

Our eyes can only detect light and the visible part of the electromagnetic spectrum. Galaxy has Xray, visible and radio waves. At the center of the galaxy is a black hole, which has a huge impact on the galaxy's growth and formation. In cases where there is a lot of gas in the central region of the galaxy, this material can be accreted on to the black hole via an Accretion Disk, releasing a lot of energy in the process. This is what we call, an Active Galactic Nucleus. The radiation produced by the AGN is so bright that it can outshine the entire galaxy, producing far more energy than all of the galaxy's stars combined. It may form huge jets of strong magnetic fields emanating out from around the black hole. Here is an image of combined wave length from visible, X-ray and radio

Galaxy in 3 wave lengths

The material that accretes onto a black hole produce X-rays, because particle are becoming very hot. We can assess the presence of supermassive black hole by measuring powerful jets coming from a compact core, rapid changes in the luminosity of the galaxy nucleus, very high speed orbital motions of stars in the galactic nucleus.

Cross-matching

When investigating astronomical objects, like active galactic nuclei (AGN), astronomers compare data about those objects from different telescopes at different wavelengths. This requires positional cross-matching to find the closest counterpart within a given radius on the sky.

To create a catalog of object from survey images, the source-finding software uses the same technics of going through all the pixels and find peaks that are statistically significant.

How to calculate distance in the sky? Two objects in the same image are not in the same plane, we can compute the angular distance but they may be far aways on those line.

The cross matching between 2 catalogs: The BSS catalogue lists the brightest sources from the AT20G radio survey while the SuperCOSMOS catalogue lists galaxies observed by visible light surveys.

The positions of stars, galaxies and other astronomical objects are usually recorded in either equatorial or Galactic coordinates.

  • Right ascension: the angle from the vernal equinox to the point, going east along the celestial equator. Given in hours-minutes-seconds (HMS). 1 hour = 15 degrees
  • Declination: the angle from the celestial equator to the point, going north (negative values indicate going south). Recorded in degrees-minutes-seconds (DMS) notation. A full circle is 360 degrees, each degree has 60 arcminutes and each arcminute has 60 arcseconds.

The vernal equinox is the intersection of the celestial equator and the ecliptic where the ecliptic rises above the celestial equator going further east.

To crossmatch two catalogues we need to compare the angular distance between objects on the celestial sphere, which is the projected angle between objects as seen from Earth.

See cross-matching.py code for in place comments and study. But this program is in O(n*m), there is a Astropy library with cross marching, using k-d-tree as demonstrated in this code).

Statistic / data science helps Astronomy

Data could not answer directly what you want to find. so we can use probability theory to assess if the data provide answer. The approach is to try to assert hypothesis and derive what kind of data we should expect to see. Then you use the fit model approach by selecting the hypothesis that fit the best the data and throw away the ones that dont fit the data. 2016 set a record for the biggest haul of exoplanets, when the Kepler team applied statistical validation to verify over a thousand new planets.

Exoplanets

The science of exoplanets kicked off back in the late 1990s, the success of the space telescopes CoRoT and Kepler has really accelerated the field. Back in the 90s, we were discovering one or two planets a year, on average.

Kepler helps discover hundreds of new planets are being confirmed every year, with thousands more candidates being found.

The most common planets are the super earth. The NASA public catalog. Here are some of the helpful attributes

Attribute Description
Kepler ID Unique target identification number for stars
KOI name String identifier for the Kepler Object of Interest (KOI)
Teff (K) Effective temperature of a star in Kelvin
Radius Radius of stars and planets in units of solar radius/earth radius respectively
Kepler name Unique string identifier for a confirmed exoplanet in the planet table
Period Orbital period of a planet in units of days
Status Status of a discovered KOI in the planet table, e.g. "confirmed" or "false positive"
Teq Equilibrium temperature of a planet in Kelvin

Some interesting queries:

SELECT koi_name, radius FROM Planet ORDER BY radius DESC LIMIT 5;
+# analyse the size of the unconfirmed exoplanets (kepler_name is null).
+SELECT MIN(radius), MAX(radius), AVG(radius), STDDEV(radius) FROM Planet where kepler_name is NULL;
+# how many planets in the Planet database are in a multi-planet system
+select kepler_id, count(koi_name) from Planet group by kepler_id having count(koi_name) > 1 order by count(koi_name) desc;
+

Which Earth sized planets are in the inhabitable zone of the host star?

To work out which planets are in the habitable zone, we'll consider the energy budget of a planet. How much energy it receives from its star versus how much it radiates back into space. The intensity of the energy decrease the further the planet is from its star. The incoming energy budget of the planet clearly depends on the brightness of its star, and how close the planet is to that star.

The insulation flux for earth is 1361 W/ m2

\ No newline at end of file diff --git a/aws/index.html b/aws/index.html new file mode 100644 index 0000000..5bba90d --- /dev/null +++ b/aws/index.html @@ -0,0 +1,39 @@ + AWS - Python code studies

AWS

boto3 library

A unique library to access all AWS services from a python app.

Installation

pip install boto3[crt]
+

Set up authentication credentials for your AWS account using either the IAM Console or the AWS CLI.

aws configure
+# Verify access
+aws iam list-users
+

Info

The jbcodeforce/python docker image has the aws cli and goto3.

Programming samples

Access S3

import boto3
+# declare a client to the service you want
+s3 = goto3.service("s3")
+# use SDK API for s3.
+s3.buckets.all()
+

Access DynamoDB

The client have can get the table name using the API client:

import os, boto3
+
+AWS_ACCESS_KEY_ID=os.environ.get("AWS_ACCESS_KEY_ID")
+AWS_SECRET_ACCESS_KEY=os.environ.get("AWS_SECRET_ACCESS_KEY")
+
+client = boto3.client(
+    'dynamodb',
+    aws_access_key_id=AWS_ACCESS_KEY_ID,
+    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
+    )
+table = client.list_tables()
+tableName=table['TableNames'][0]
+

Then use the dynamoDB API:

orderTable = dynamodb.Table(tableName)
+
+dynamodb = boto3.resource(
+    'dynamodb',
+    aws_access_key_id=AWS_ACCESS_KEY_ID,
+    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
+    )
+
+orderTable = dynamodb.Table(tableName)
+orderTable.put_item(
+   Item={
+     "orderID": "ORD001",
+     "customerID": "C01", 
+     "productID": "P01", 
+     "quantity": 10,  
+     "destinationAddress": { "street": "1st main street", "city": "Santa Clara", "country": "USA", "state": "CA", "zipcode": "95051" }
+   })
+
  • Run it once the python virtual env is enabled with python dynamoClient.py

CDK with python

Separate note in AWS_Studies.

\ No newline at end of file diff --git a/dev-env/index.html b/dev-env/index.html new file mode 100644 index 0000000..01d4f10 --- /dev/null +++ b/dev-env/index.html @@ -0,0 +1,32 @@ + Development environment - Python code studies

Development environments

Apple Mac OS uses python for its own operations, so it is very important to isolate the development environment from the operation of the OS to avoid compromising the integrity of the whole system. So virtualenv can be used, but in today world, docker and pipenv are the way to go as they:

  • avoid installing softwares and libraries not often used on the native OS
  • describe the dependencies on library so programs developed 5 years ago should still run
  • easy to switch laptop
  • quicker provisioning than a VM running with Vagrant, still offering mounting host folder, running under localhost
  • use docker compose for each project to manage component dependencies
  • if needed pipenv can be used to set up virtual environment to isolate dependencies

Clone this project

git clone https://github.com/jbcodeforce/python-code
+cd python-code
+

Use the different docker images

The DockerfileForEnv in the current project defines an image for running python 3.7 with Flask, pytest, panda and other basic libraries.

To build the image you need docker engine and do the following

docker build -t jbcodeforce/python37 . 
+

Then start the image as container with the command below, which also mount your local folder to the /home folder inside the docker container:

docker run -e DISPLAY=192.168.1.89:0 --name pythonenv -v $(pwd):/home/ -it --rm -p 5000:5000 jbcodeforce/python37 bash
+

Note

The script named startPythonDocker.sh performs this command.

The docker image includes pipenv for improving the dependency management.

The other Dockerfile for astrophysic is Here

Use pipenv

Pipenv offers the last best practices from other language to manage virtual environment and dependencies for Python. Adding and removing packages is also updating the dependencies descriptor file: Pipfile. It basically combine pip and virtualenv. It helps addressing build inconsistency that requirements.txt brings.

To install it on the mac:

brew install pipenv
+

When using the docker image you do not need to install pipenv on the host. It is also available in the docker image so the following commands should work from the bash inside the docker container.

# Create a new project using Python 3.7
+pipenv --python 3.7
+# start the virtual env shell
+pipenv shell
+# or start a python interpreter
+pipenv run python
+# or run a program with python interpreter
+pipenv run python FaustEasiestApp.py
+# install dependencies including dev
+pipenv install --dev
+#Check your installed dependencies for security vulnerabilities:
+pipenv check
+# print a pretty graph of all your installed dependencies.
+pipenv graph 
+# Uninstalls all packages not specified in Pipfile.lock.
+pipenv clean
+# lock dependencies
+pipenv lock
+

Run the python interpreter

Start python in the container shell:

root@485cff8245df:/home#  python
+Python 3.7.4 (default, Jul  9 2019, 00:06:43) 
+[GCC 6.3.0 20170516] on linux
+>>> 
+

Use exit() to get out of the python interpreter, and Ctrl D for getting out of the Docker container.

Using graphics inside the python container

The approach is to run graphics program inside python interpreter, but the windows will appear on the host machine (the Mac). To do so we need a bidirectional communication between the docker container and the Mac. This is supported by the socat tool. To install it the first time do the following:

brew install socat
+

When installed, open a new terminal and start socat with the command:

socat TCP-LISTEN:6000,reuseaddr,fork UNIX-CLIENT:\"$DISPLAY\"
+

As the container is running X window system, we need to also install a X system on Mac. This is done via the Xquartz program:

brew install xquartz
+

Then start Xquartz from the application or using

open -a Xquartz
+

A white terminal window will pop up. The first time Xquartz is started, open up the preferences menu and go to the security tab. Then select “allow connections from network clients” to check it on.

See this note from Nils De Moor for more information.

\ No newline at end of file diff --git a/extra.css b/extra.css new file mode 100644 index 0000000..0b82e31 --- /dev/null +++ b/extra.css @@ -0,0 +1,3 @@ +.section ul li { + margin-bottom: 10px; +} diff --git a/flask/flask-tdd-docker/index.html b/flask/flask-tdd-docker/index.html new file mode 100644 index 0000000..a1ed7ab --- /dev/null +++ b/flask/flask-tdd-docker/index.html @@ -0,0 +1,146 @@ + Flask Rest microservice with TDD and Docker - Python code studies

Flask microservice with TDD and docker

This content is based on the tutorial from tesdriven.io and covers:

The folder Flask/flask-tdd-docker includes the training code.

Set virtual env

The old way to define virtual environment was to use the following approach:

python3.7 -m venv env
+source env/bin/activate
+# play with python ...
+# Stop with:
+deactivate
+

As of today, the approach is to use pipenv, where you update the project and development dependencies in a Pipfile.

pipenv --python 3.7
+# start the virtual env
+pipenv shell
+pipenv install --dev
+

Freeze the dependencies:

pipenv lock -r > requirements.txt
+

Define and run the flask app

Define a manage.py to represent the app, and use the Flask CLI shell to manage the app from command line:

from flask.cli import FlaskGroup
+from project import app
+
+
+cli = FlaskGroup(app)
+
+
+if __name__ == '__main__':
+    cli()
+
export FLASK_APP=project/__init__.py
+# use the Flask CLI from inside the app itself
+python manage.py run
+

Run in development mode for debugging.

export FLASK_ENV=development
+python manage.py run
+* Serving Flask app "project/__init__.py" (lazy loading)
+* Environment: development
+* Debug mode: on
+

With the Flask shell we can explore the data in the application:

flask shell
+

Using docker and docker compose

The dockerfile uses alpine linux and non root user. The docker compose uses volume to mount the code into the container. This is a must for a development environment in order to update the container whenever a change to the source code is made. Then build the image using docker compose.

docker-compose build
+# then start in detached mode
+docker-compose up -d
+# Rebuild the docker images 
+docker-compose up -d --build
+# Access app logs
+docker-compose logs
+# Access to a python shell to control the flask app
+docker-compose exec users flask shell
+

Add persistence on Postgresql and use SQLAlchemy

To initialize the postgresql copy a sql file under /docker-entrypoint-initdb.d (creating the directory if necessary).

docker compose section for postgresql:

users-db:  
+    build:
+      context: ./project/db
+      dockerfile: Dockerfile
+    expose:
+      - 5432
+    environment:
+      - POSTGRES_USER=postgres
+      - POSTGRES_PASSWORD=postgres
+

Once spun up, PostgreSQL will be available on port 5432. Be sure to include dependencies in the app dockerfile

# install dependencies
+RUN apk update && \
+    apk add --virtual build-deps gcc python-dev musl-dev && \
+    apk add postgresql-dev && \
+    apk add netcat-openbsd && \
+    pip install --upgrade pip && \
+    pip install --upgrade --user pipenv 
+

Also to avoid having the application getting error because it could not contact the database add a entrypoint.sh shell to loop until the database is accessible before starting the python app.

To access psql, use the following docker compose command

docker-compose exec users-db psql -U postgres
+
+psql (11.4)
+Type "help" for help.
+
+postgres=# \c users_dev
+You are now connected to database "users_dev" as user "postgres".
+users_dev=# \dt
+Did not find any relations.
+users_dev=# \q
+

In the manage.py file, register a new flask CLI command, recreate_db, so that we can run it from the command line like:

docker-compose exec users python manage.py recreate_db
+
# @cli.command('recreate_db')
+def recreate_db():
+    db.drop_all()
+    db.create_all()
+    db.session.commit()
+

Add tests with pytest

While unittest requires test classes, Pytest just requires functions to get up and running.

Define fixtures as reusable elements for future tests

They have a scope associated with them, which indicates how often the fixture is invoked:

  • function - once per test function
  • class - once per test class
  • module - once per test module
  • session - once per test session

Some fixture execution guidance

Define python script using 'test_' or '_test.py'. Here is an example of functional testing:

def test_ping(test_app):
+    client = test_app.test_client()
+    resp = client.get('ping')
+    data = json.loads(resp.data.decode())
+    assert resp.status_code == 200
+    assert 'pong' in data['message']
+    assert 'success' in data['status']
+

Execute test with pytest: pytest project/tests/ or with docker compose

docker-compose exec users pytest "project/tests"
+

Test coverage

Coverage.py is a popular tool for measuring code coverage in Python-based applications. Now, since we're using Pytest, we'll integrate Coverage.py with Pytest using pytest-cov. In Pipfile add pytest-cov = ">=2.7.1" then do pipenv install

Then once the image is rebuilt, run the following command to assess the test coverage:

docker-compose exec users pytest "project/tests" -p no:warnings --cov="project"
+# or using html page
+docker-compose exec users pytest "project/tests" -p no:warnings --cov="project" --cov-report html
+

Remember: just because you have 100% test coverage doesn’t mean you're testing the right things

Code quality

Linting is the process of checking your code for stylistic or programming errors. Although there are a number of commonly used linters for Python, we'll use Flake8 since it combines two other popular linters -- pep8 and pyflakes.

In Pipfile add flake8 = ">=3.7.8", do a pipenv install then freeze the dependencies with pipenv lock -r > requirements.txt, then rebuild the docker image and run flake8:

 docker-compose exec users flake8 project
+

Black helps to format code and apply code formatting:

# check
+docker-compose exec users black project --check
+# see the propose changes
+docker-compose exec users black project --diff
+# apply the change
+docker-compose exec users black project
+

Add Blueprints template

Blueprints are self-contained components, used for encapsulating code, templates, and static files. They are apps within the app. For example REST resource can be defined in Blueprint.

For example to add an api and a resource, define a new py file, and create a blueprint instance:

users_blueprint = Blueprint('users', __name__)
+api = Api(users_blueprint)
+

Then define a class with functions to support the expected Resource, and add this class to a url to the api.

class UsersList(Resource):
+    def get(self):
+        ...
+    def post(self):
+        ...
+api.add_resource(UsersList, '/users')
+

Finally register the resouce to the flask application:

    from project.api.users import users_blueprint
+    app.register_blueprint(users_blueprint)
+

See the code in users.py and init.py

Factory to create an app needs to be named create_app.

Adding admin and model view

Production deployment with gunicorn

Create a specific Dockerfile.prod and set the environment variable to run Flask in production mode and use gunicorn as container, and run under a user that is not root.

ENV FLASK_ENV production
+ENV APP_SETTINGS project.config.ProductionConfig
+
+# add and run as non-root user
+RUN adduser -D myuser
+USER myuser
+
+# run gunicorn
+CMD gunicorn --bind 0.0.0.0:$PORT manage:app
+

Heroku

Using heroku CLI.

$ heroku login
+# create a app
+$ heroku create 
+Creating app... done, murmuring-shore-37331
+https://murmuring-shore-37331.herokuapp.com/ | https://git.heroku.com/murmuring-shore-37331.git
+
+# login to docker private registry
+$ heroku container:login
+
+# create a postgresql with the hobby-dev plan
+$ heroku addons:create heroku-postgresql:hobby-dev --app murmuring-shore-37331
+
+Creating heroku-postgresql:hobby-dev on murmuring-shore-37331... free
+Database has been created and is available
+ ! This database is empty. If upgrading, you can transfer
+ ! data from another database with pg:copy
+Created postgresql-horizontal-04149 as DATABASE_URL
+Use heroku addons:docs heroku-postgresql to view documentation
+
+# Get database URL
+heroku config:get DATABASE_URL --app murmuring-shore-37331
+

The containers used at Heroku are called “dynos.” Dynos are isolated, virtualized Linux containers that are designed to execute code based on a user-specified command.

To build an image for the docker private registry, using the web dyno, that is free.

$ docker build -f Dockerfile.prod -t registry.heroku.com/murmuring-shore-37331/web .
+# publish
+$ docker push registry.heroku.com/murmuring-shore-37331/web:latest
+# test locally
+$ docker run --name flask-tdd -e "PORT=8765" -p 5002:8765 registry.heroku.com/murmuring-shore-37331/web:latest
+
+# Release the image, meaning the app will be based on the container image
+$ heroku container:release web --app murmuring-shore-37331
+Releasing images web to murmuring-shore-37331... done
+

Once the image is "released", the app is accessible via https://murmuring-shore-37331.herokuapp.com/ping

Access to logs: heroku logs --app murmuring-shore-37331

The users are not yet created, so we can run the CLI heroku run:

# create DB
+heroku run python manage.py recreate_db --app murmuring-shore-37331
+# populate the data
+heroku run python manage.py seed_db --app murmuring-shore-37331
+# Access the database with psql: 
+# 1. start a local docker postgresql with psql
+docker run -ti postgresql bash
+> psql postgres://....
+> PSQL:
+
\ No newline at end of file diff --git a/flask/readme/index.html b/flask/readme/index.html new file mode 100644 index 0000000..94f7e46 --- /dev/null +++ b/flask/readme/index.html @@ -0,0 +1,59 @@ + Flask studies and basic samples - Python code studies

Python Flask Studies

The most complete starter code is from the Flask TDD tutorial and using docker. But I have incremental apps, to make it simpler to develop an app from scratch.

Some concepts

Flask app takes care of dispatching requests to views and routes.

Samples

To use a boiler plate code with Flask, Blueprint, Swagger, Prometheus see the boiler plate folder.

The simplest Flask app

The simplest Flask app is presented in the quickstart and the matching code is under Flask/firstApp/firstApp.py. To execute it in your python environment:

cd Flask/firstApp
+# start docker image for dev environment
+docker run -ti -v $(pwd):/app -p 5000:5000 jbcodeforce/python37 bash
+# Can run it with python - it will start in debug mode
+python firstApp.py
+# Or run it with flask CLI
+export FLASK_APP=firstApp.py
+flask run --host=0.0.0.0
+ * Serving Flask app "firstApp"
+ * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)
+127.0.0.1 - - [17/Dec/2018 20:49:42] "GET / HTTP/1.1" 200 -
+

The FLASK_APP environment variable is the name of the module to import at flask run.

To make the server publicly available simply by adding --host=0.0.0.0 to the command: flask run --host=0.0.0.0

If we want to run it in debug mode then any change to the code reload itself. To do so use:

export FLASK_ENV=development
+flask run --host=0.0.0.0
+

Next, is to use gunicorn to run it on top of a wsgi server so in the docker container add:

gunicorn -w 4 -b 0.0.0.0:5000 firstApp:app
+

Which is the command in the dockerfile under the firstApp folder:

 docker build -t jbcodeforce/firstApp .
+

Start the image with

docker run --name firstApp --rm -p 5000:5000 jbcodeforce/firstApp
+

Serving static pages

Add a folder named static at the same level as app to start. The staticApp.py demonstrates the routing specified and the api to send the file.

from flask import Flask
+app = Flask(__name__)
+
+
+@app.route('/')
+def root():
+    return app.send_static_file('404.html')
+
+
+if __name__ == "__main__":
+    app.run(debug=True,host='0.0.0.0')
+

and the execution:

export FLASK_APP=staticApp.py
+flask run
+ * Serving Flask app "staticApp"
+ * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
+127.0.0.1 - - [17/Dec/2018 21:29:00] "GET / HTTP/1.1" 200 -
+

A REST api

The route decorator is used to bind function to a URL. You can add variables and converter. The firstRESTApp.py illustrates the different patterns. The important modules to import are:

from flask import Flask, url_for, request, json
+

Accessing the HTTP headers is done using the request.headers dictionary ("dictionary-like object") and the request data using the request.data string.

A second nice module is Flask Restful. We can declare Resource class and use the API to link the resource to an URL.

The following code illustrates the resource class, with an argument passed at the constructor level to inject it into the resource. In this case this is a Kafka consumer which includes a map of the message read. The class is using the Blueprint module to simplify the management of resource:

# code of the resource.py
+from flask_restful import Resource, Api
+from flask import Blueprint
+
+data_inventory_blueprint = Blueprint("data_inventory", __name__)
+inventoryApi = Api(data_inventory_blueprint)
+
+class DataInventory(Resource):  
+
+    def __init__(self, consumer):
+        self.consumer = consumer
+
+    # Returns the Inventory data in JSON format
+    @track_requests
+    @swag_from('data_inventory.yml')
+    def get(self):
+        logging.debug('[DataInventoryResource] - calling /api/v1/data/inventory endpoint')
+        return self.consumer.getAllLotInventory(),200, {'Content-Type' : 'application/json'}
+

The app.py that uses this resource, accesses the API and add_resource method, to define the resource class, the URL and then ant arguments to pass to the resource constructor.

from server.api.inventoryResource import data_inventory_blueprint, inventoryApi, DataInventory
+
+
+app = Flask(__name__)
+
+inventory_consumer = InventoryConsumer()
+inventoryApi.add_resource(DataInventory, "/api/v1/data/inventory",resource_class_kwargs={'consumer':inventory_consumer})
+

Flask REST API article

An Angular app

See this repository to a more complete example of angular development and Flask.

Flask TDD Docker

See this dedicated note

Flask with DynamoDB, ECR, Fargate

See Code and Readme

Flask Blueprint

Helps to structure the application in reusable components. To use in any Flask Blueprint, you have to import it and then register it in the application using register_blueprint(). A blueprint is an object that works like a flask app too. See the boiler plate example.

\ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..d6ae847 --- /dev/null +++ b/index.html @@ -0,0 +1 @@ + Python code studies

Python Studies

This repository regroups a bench of python codes from my own self-training and studies for web development, crawling, python best practice, and raspberry PI work.

Language Advantages / disadvantages

Advantages:

  • Interpreted with shell to start quickly, more concise language
  • 2nd most used programming language
  • A lot of libraries, used a lot by data scientists
  • Combines functional and OOP.
  • Raspberry PI language of choice

Disadvantages:

  • Slow, not supporting well multi cpu / threading architecture
  • Not great for mobile and 3D game programming

Code in the order of knowledge acquisition

Basics

Flask

Algorithms

Graphics

Web scrawling

Use urllib and beautiful soup to remove html tags from a web page to get text to parse. See this note for guidances

Astronomy

See detailed note here and code is under astronomy folder.

AWS

To get some sample code to use AWS SDK see this folder.

Some tricks

  • placing cursor to previous line and enter will copy the line to a new line
\ No newline at end of file diff --git a/pandas/index.html b/pandas/index.html new file mode 100644 index 0000000..2ff4649 --- /dev/null +++ b/pandas/index.html @@ -0,0 +1,94 @@ + Pandas - Python code studies

Pandas summary

See the Kaggle quick tutorial on pandas.

Pandas has two core objects: the DataFrame and the Series.

DataFrame is like a table which contains an array of individual entries, each of which has a certain value. Each entry corresponds to a row (or record) and a column.

Series is a sequence of data values, it may be a single column of a DataFrame.

Here is a quick summary of some of the tutorial content:

# use pandas
+import pandas as pd
+# Create a data frame from a dictionary whose keys are the column names and values are list of entries
+pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 'Sue': ['Pretty good.', 'Bland.']})
+# with row header as index
+pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 
+              'Sue': ['Pretty good.', 'Bland.']},
+             index=['Product A', 'Product B'])
+# read from file
+home_data = pd.read_csv(a_file_path,index_col=0)
+# get row / columns size
+home_data.shape
+home_data.head()
+# Series
+pd.Series(["4 cups", "1 cup", "2 large", "1 can"],index=["Flour", "Milk", "Eggs", "Spam"],name="Dinner")
+

Indexing

Pandas uses two approach:

  • index-based selection: To select first row of a data frame: reviews.iloc[0]. To get a column with iloc use: reviews.iloc[:, 0]. We can select row too, like getting the last five elements of the dataset: reviews.iloc[-5:]
  • label-based selection. gets from data index value, not its position: reviews.loc[0, 'country']. Or select three columns and all rows: reviews.loc[:, ['taster_name', 'taster_twitter_handle', 'points']]

Label-based selection derives its power from the labels in the index. We can set index with:

reviews.set_index("title")
+
+# Select elements that match condition 
+reviews.loc[reviews.country == 'Italy']
+# or within a list
+reviews.loc[reviews.country.isin(['Italy', 'France'])]
+# not empty cell
+reviews.loc[reviews.price.notnull()]
+# get specific rows
+sample_reviews = reviews.iloc[[1,2,3,5,8],:]
+# Combining conditions
+top_oceania_wines = reviews.loc[reviews.country.isin(['New Zealand','Australia']) & (reviews.points >= 95)]
+
# Get high-level summary of the attributes of the given column
+reviews.points.describe()
+# Get unique elements in a column
+reviews.taster_name.unique()
+# To see a list of unique values and how often they occur in the dataset
+reviews.taster_name.value_counts()
+# Ex bargain_wine with the title of the wine with the highest points-to-price ratio in the dataset.
+bargain_idx = (reviews.points / reviews.price).idxmax()
+bargain_wine = reviews.loc[bargain_idx, 'title']
+

map

Map() takes one set of values and "maps" them to another set of values. map() should expect a single value from the Series and return a transformed version of that value.

review_points_mean = reviews.points.mean()
+reviews.points.map(lambda p: p - review_points_mean)
+# build a series to count how many times each of tropical, fruity words appears in the description column in the dataset.
+topic= reviews.description.map(lambda d: "tropical" in d).sum()
+fruit= reviews.description.map(lambda d: "fruity" in d).sum()
+descriptor_counts = pd.Series([topic,fruit], index=['tropical', 'fruity'])
+

apply() transforms a whole DataFrame by calling a custom method on each row.

Both methods don't modify the original data they're called on.

Grouping

Use groupby() to group our data, and then do something specific to the group the data is in:

# Group of reviews which allotted the same point values to the given wines. 
+# Then, for each of these groups, grab the `points` column and count how many times it appeared
+reviews.groupby('points').points.count()
+# Apply transformation to the new dataframe
+reviews.groupby('winery').apply(lambda df: df.title.iloc[0])
+

agg() lets us run a bunch of different functions on our DataFrame simultaneously

reviews.groupby(['country']).price.agg([len, min, max])
+

groupby combined with some specific operations may create multi-indexes, which looks like a tiered structure.

countries_reviewed = reviews.groupby(['country', 'province']).description.agg([len])
+# sort by ascending
+countries_reviewed.sort_values(by='len', ascending=False)
+# To sort by index values, use the companion method sort_index()
+# sort by more than one column
+countries_reviewed.sort_values(by=['country', 'len'])
+# most wine reviewer, using their twitter name
+reviews.groupby(['taster_twitter_handle']).taster_twitter_handle.count()
+# What is the best wine I can buy for a given amount of money? 
+best_rating_per_price = reviews.groupby('price').points.max().sort_index()
+# What are the minimum and maximum prices for each variety of wine? 
+price_extremes = reviews.groupby('variety').price.agg([min,max])
+# What are the most expensive wine varieties?
+sorted_varieties = reviews.groupby('variety').price.agg([min,max]).sort_values(by=['min','max'],ascending=False)
+# A series with index is reviewers and whose values is the average review score given out by that reviewer.
+reviewer_mean_ratings = reviews.groupby(['taster_name']).points.agg('mean')
+

Data types

# Get data type of the points column
+reviews.points.dtype
+# All column types
+reviews.dtypes
+# convert to another type, when it makes sense
+reviews.points.astype('float64')
+

Missing values

Entries missing values are given the value NaN, short for "Not a Number".

reviews[pd.isnull(reviews.country)]
+# How many reviews in the dataset are missing a price?
+missing_prices = reviews[reviews.price.isnull()]
+n_missing_prices = len(missing_prices)
+
+# What are the most common wine-producing regions? 
+# Series counting the number of times each value occurs in the region_1 field. 
+# This field is often missing data, so replace missing values with Unknown. 
+# Sort in descending order.
+reviews.region_1.fillna('Unknown').value_counts().sort_values(ascending=False)
+
+# Replace a value by another
+reviews.taster_twitter_handle.replace("@kerinokeefe", "@kerino")
+

Renaming

rename() lets you rename index or column values by specifying a index or column keyword parameter, respectively.

reviews.rename(columns={'points': 'score'})
+
+reviews.rename(index={0: 'firstEntry', 1: 'secondEntry'})
+
+# Rename index
+reindexed = reviews.rename_axis('wines',axis='rows')
+

Combining

Pandas has three core methods for doing combining data frames. In order of increasing complexity, these are concat(), join(), and merge().

join() lets you combine different DataFrame objects which have an index in common

left = canadian_youtube.set_index(['title', 'trending_date'])
+right = british_youtube.set_index(['title', 'trending_date'])
+
+left.join(right, lsuffix='_CAN', rsuffix='_UK')
+

The lsuffix and rsuffix parameters are necessary here because the data has the same column names in both British and Canadian datasets.

\ No newline at end of file diff --git a/python/compendium/index.html b/python/compendium/index.html new file mode 100644 index 0000000..b0fe2e7 --- /dev/null +++ b/python/compendium/index.html @@ -0,0 +1 @@ + Compendium - Python code studies
\ No newline at end of file diff --git a/python/faq/index.html b/python/faq/index.html new file mode 100644 index 0000000..07cab81 --- /dev/null +++ b/python/faq/index.html @@ -0,0 +1,124 @@ + FAQ - Python code studies

Python FAQ

Why pipenv

pipenv resolves the problem of dependencies management, that is not perfectly done in the requirements.txt, which leads to underterministic build process. Given the same input (the requirements.txt file), pip doesn’t always produce the same environment. pip freeze helps to freeze the dependencies and update your requirements.txt. But any dependency change needs to be done manually, and you need to track the dependent package version, for bug fix, or mandatory security fixes.

A second problem is the system wide repository used by pip. When developing multiple different projects in parallele that could become a real issue. pipenv use a per project environment. pipenv acts as pip + virtual environment. It uses Pipfile to replace requirements.txt and pipfile.lock for determnistic build. See this guide for command examples.

How to get program arguments?

  import sys,getopt
+  USER="jbcodeforce"
+  FILE="./data/export-questions.json"
+  try:
+    opts, args = getopt.getopt(argv,"hi:u:",["inputfile=","user="])
+  except getopt.GetoptError:
+    print(usage())
+    sys.exit(2)
+
+
+  for opt, arg in opts:
+    if opt == '-h':
+      usage()
+      sys.exit()
+    elif opt in ("-u", "--user"):
+      USER = arg
+    elif opt in ("-i", "--inputfile"):
+      FILE = arg
+

Using arg_parser

import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument("filename", help="Must specify a file name")
+parser.add_argument("--append", help="Append records to existing file",action="store_true")
+args = parser.parse_args()
+if args.append:
+    print("append to file")
+

what __init__.py under folder used for?

The __init__.py file makes Python treat directories containing it as modules. Furthermore, this is the first file to be loaded in a module, so you can use it to execute code that you want to run each time a module is loaded, or specify the submodules to be exported.

How to get program dependencies generated?

pip freeze > requirements.txt
+

Access environment variable

Define environment variables in a .env file, use os package:

import os
+
+AWS_ACCESS_KEY_ID=os.environ.get("AWS_ACCESS_KEY_ID")
+AWS_SECRET_ACCESS_KEY=os.environ.get("AWS_SECRET_ACCESS_KEY")
+

List content of folder

import glob
+
+def listOfYaml():
+    return glob.glob("./*.yaml")
+

Change content of yaml

import glob
+import yaml
+
+def listOfYaml():
+    return glob.glob("./*.yaml")
+
+def processYamlFile(f):
+    with open(f) as aYaml:
+        listDoc = yaml.safe_load(aYaml)
+    print(listDoc)
+    listDoc["metadata"]["namespace"]='std-2'
+    print(listDoc)
+
+
+f = listOfYaml()
+processYamlFile(f[0])
+

How to sort unit tests?

Use TestSuite and TestRunner. See TestPerceptron.py for usage.

import unittest
+
+class TestPerceptron(unittest.TestCase):
+  # ....
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(TestPerceptron('testLoadIrisData'))
+    suite.addTest(TestPerceptron('testPlotIrisData'))
+
+if __name__ == "__main__":
+    runner = unittest.TextTestRunner(failfast=True)
+    runner.run(suite())
+

How to traverse a directory hierarchy?

import osfor
+root, dirs, files in os.walk("/mydir"):    
+  for file in files:        
+    if file.endswith(".txt"):             
+      print(os.path.join(root, file))
+

How to select a random item from a list?

import random
+move=random.choice(possibleMoves)
+

Logger

import logging
+

Start python with the --log=INFO to set the logging level.

Reading Files

Read json file

g = open('critics.json','r')
+d = json.load(g)
+

Read csv file

f = open('fn.csv','r')
+for line in f:
+  record = line.split(',')
+
+# or with unicode:
+  changedLine=u''.join(line).encode('utf-8').strip()
+

Read file with specific encoding

 with open('../data/movielens/u.item',  encoding='ISO-8859-1') as f:
+

Skip the first row of a file

f = open('fn.csv','r')
+f.readline()
+for line in f:
+

How to get execution time

import time
+start = time.perf_counter()
+# potentially slow computation
+end = time.perf_counter() - start
+

Example of memory consumption for object

import sys
+
+a = 3
+b = 3.123
+c = [a, b]
+d = []
+for obj in [a, b, c, d]:
+  print(obj, sys.getsizeof(obj))
+

Using CloudEvent

attributes = {
+      "type": "com.anycompany.bdcp.user",
+      "source": "https://anycompany.com/user-mgr",
+}
+data = { "eventType": "UserLogin", "username": "bob.the.builder@superemail.com"}
+event = CloudEvent(attributes, data)
+print(event)
+

What is zip?

Returns an iterator of tuples, where the i-th tuple contains the i-th element from each of the argument sequences or iterable:

dataset
+[[1, 20, 0], [2, 21, 1], [3, 22, 0]]
+for a in zip(*dataset): print(a)
+(1, 2, 3)
+(20, 21, 22)
+(0, 1, 0)
+

How to use some math

# modulo
+8 % 2
+0
+#
+

What is a package?

A package is nothing more than a folder, which must contain a special file, init.py. (not needed anymore with python3.3+)

What are namespace and scope?

A namespace is a mapping from names to objects. They are the built-in names, the global names in a module, and the local names in a function. A scope is a textual region of a Python program, where a namespace is directly accessible. There are four different scopes that Python makes accessible:

  • The local scope, which is the innermost one and contains the local names.
  • The enclosing scope, that is, the scope of any enclosing function. It contains non-local names and also non-global names.
  • The global scope contains the global names.
  • The built-in scope contains the built-in names.

customize matplotlib graph

    graph.set_title("Results of 500 slot machine pulls")
+    # Make the y-axis begin at 0
+    graph.set_ylim(bottom=0)
+    # Label the y-axis
+    graph.set_ylabel("Balance")
+    # Bonus: format the numbers on the y-axis as dollar amounts
+    # An array of the values displayed on the y-axis (150, 175, 200, etc.)
+    ticks = graph.get_yticks()
+    # Format those values into strings beginning with dollar sign
+    new_labels = ['${}'.format(int(amt)) for amt in ticks]
+    # Set the new labels
+    graph.set_yticklabels(new_labels)
+
\ No newline at end of file diff --git a/python/python-summary/index.html b/python/python-summary/index.html new file mode 100644 index 0000000..04f41d2 --- /dev/null +++ b/python/python-summary/index.html @@ -0,0 +1,178 @@ + Summary - Python code studies

Python Summary

See this good tutorial from Programiz

Python is an interpreted Object Oriented & functional language. It organizes the code in modules. Use blank to indent code block. The coding style is known as PEP8.

3.9 release Product documentation

Getting started

Start a python interpreter: python3 and start entering python code, develop a progname.py file and use python3 progname.py, or add #!/usr/bin/env python3 to make it self runnable.

The code can be structured with function def name(): and then with our without a main part:

if __name__ == "__main__":
+

Better to use main statement when using objects and classes.

Concepts

Datatypes

  • list:

    • concat lists: a = [1,2,3,4] then a = a + [5,6] or a + list("789") -> [1,2,3,4,5,6,'7','8','9']. Lists are mutable.
    • len(a)
    • slicing: all elements except first and last: a[1:-1], all from index 3: a[3:]
    • list.append(a_record) modifies a list by adding an item to the end
    • list.pop() removes and returns the last element of a list
    • Get one object index using the list.index(object)
    • in list to assess if element in the list
    • list comprehensions:

    • squares = [n**2 for n in range(10)]

    • short_planets = [planet for planet in planets if len(planet) < 6]
  • dictionary is like json object, with key-value list. The main operations on a dictionary are storing a value with some key and extracting the value given the key. It is also possible to delete a key:value pair with del. If you store using a key that is already in use, the old value associated with that key is forgotten.

cols={}
+cols("column_name_1") = np.random.normal(2,1,10)
+
  • A list comprehension consists of brackets containing an expression followed by a for clause, then zero or more for or if clauses. The result will be a new list resulting from evaluating the expression in the context of the for and if clauses which follow it. squares = [x**2 for x in range(10)]
  • Queues: do not use list for queue but collections.deque
from collections import deque
+    queue = deque([23,56,78,44])
+    queue.append(55)
+print(queue)
+
+> deque([23, 56, 78, 44, 55])
+twentythree=queue.popleft()
+

Tuples

tuples: a = (1,2,3,4,5) are iterable. Tuple does not support item assignment: t[3] = 5 → error.

tup1 = ('physics', 'chemistry', 1997, 2000);
+print ("tup1[0]: ", tup1[0]);
+# iteration
+for a in tup1:
+  print(a)
+

They are immutable. Need to create new tuples from existing one. Removing individual tuple elements is not possible.

Transform a tuple into array:

a=(2, 2.6496666666666666, -30.463416666666667)
+b=np.asarray(a)
+# b array([  2.        ,   2.64966667, -30.46341667])
+

String

# get the last four chars
+dirname[:len(dirname) - 4]
+# split string to get the last folder name of a path
+folders = path.split('/') 
+name= folders[len(folders)-1]
+name.lower()
+name.upper()
+name.index('substr')
+claim.startswith(planet)
+claim.endswith(planet)
+

See string tutorial

def word_search(documents, keyword):
+    """
+    Takes a list of documents (each document is a string) and a keyword. 
+    Returns list of the index values into the original list for all documents 
+    containing the keyword.
+
+    Example:
+    doc_list = ["The Learn Python Challenge Casino.", "They bought a car", "Casinoville"]
+    >>> word_search(doc_list, 'casino')
+    >>> [0]
+    """
+    # list to hold the indices of matching documents
+    indices = [] 
+    # Iterate through the indices (i) and elements (doc) of documents
+    for i, doc in enumerate(documents):
+        # Split the string doc into a list of words (according to whitespace)
+        tokens = doc.split()
+        # Make a transformed list where we 'normalize' each word to facilitate matching.
+        # Periods and commas are removed from the end of each word, and it's set to all lowercase.
+        normalized = [token.rstrip('.,').lower() for token in tokens]
+        # Is there a match? If so, update the list of matching indices.
+        if keyword.lower() in normalized:
+            indices.append(i)
+    return indices
+

Control flow

if condition: elsif condition: else

  • For statement iterates over the items of any sequence (a list or a string), in the order that they appear in the sequence
  • Range() to iterate over a sequence of numbers. (e.g. for i in range(5):). The given end point is never part of the generated sequence. It is possible to let the range start at another number, or to specify a different increment (from 0 to 10, increment 3: range(0, 10, 3)). It returns an object which returns the successive items of the desired sequence when you iterate over it
  • list(range(5)) build a list like: [0, 1, 2, 3, 4]
  • Loop statements may have an else clause; it is executed when the loop terminates through exhaustion of the list (with for) or when the condition becomes false (with while), but not when the loop is terminated by a break statement
  • The pass statement does nothing. It can be used when a statement is required syntactically but the program requires no action.

  • See Control Flow Statement Tutorial

Exception

try:
+  dosomething()
+Except ValueError:
+  pass
+

Regular Expressions

How to regex

Specialize in string pattern matching from string. It is a language by itself.

import re
+p = re.compile('ab*')
+
Char Note
^ Matches the beginning of a line
$ Matches the end of the line
. Matches any character
\s Matches whitespace
\S Matches any non-whitespace character
* Repeats a character zero or more times
*? Repeats a character zero or more times (non-greedy)
+ Repeats a character one or more times
+? Repeats a character one or more times (non-greedy)
[aeiou] Matches a single character in the listed set
[^XYZ] Matches a single character not in the listed set
[a-z0-9] The set of characters can include a range
( Indicates where string extraction is to start
) Indicates where string extraction is to end
‘@([^ ]*)' extract the domain name from the email. Use () to specify what to extract, from the @. [^ ] math non-blank character
re.findall(‘[0-9]+’,s) find all occurrence of number in string. [0-9] is one digit

Functions

Python supports OOD and functional programming like Scala. Function can be defined in a scope of a module file outside of a class, or as method of a class.

  • The keyword def introduces a function definition. It must be followed by the function name and the parenthesized list of formal parameters. The statements that form the body of the function start at the next line, and must be indented.
  • The first statement of the function body can optionally be a string literal used as docstring.
  • local variables cannot be directly assigned a value within a function (unless named in a global statement), although they may be referenced
  • The return statement returns with a value from a function. return without an expression argument returns None.
  • functions can have a variable number of arguments def ask_ok(prompt, retries=4, complaint='Yes or no, please!'):
  • Functions can also be called using keyword arguments of the form kwarg=value instead of using the positional arguments. keyword arguments must follow positional arguments. Arguments could have default value so becomes optional. Attention the default value is evaluated only once. This makes a difference when the default is a mutable object such as a list, dictionary, or instances of most classes. For example, the following function accumulates the arguments passed to it on subsequent calls
def f(a, L=[]):
+    L.append(a)
+    return L
+
+print(f(1))
+[1]
+print(f(2))
+[1,2]
+print(f(3))
+[1,2,3]
+
  • A function can be called with an arbitrary number of arguments. The syntax is def (formalArg,formalArg2,*args,kwarg=value):

  • lambda is a keyword to define an anonymous function

def make_incrementor(n):
+      return lambda x: x + n
+f = make_incrementor(42)
+f(2)
+44
+
This can be used for specifying a sort method anonymously: use the second element of a tuple to sort a list of tuples

collection.sort(key=lambda collection : collection[1])
+

variable scope in function: when there is a need to access variable defined at the module level use the keyword global

Namespaces

A namespace is a mapping from names to objects. Examples of namespaces are:

  • the set of built-in names (containing functions such as abs(), and built-in exception names), loaded when interpreter starts
  • the global names in a module, created when module is loaded, and kept until interpreter quits
  • the local names in a function invocation, created when the functions is called, deleted when function returns or raises an exception. When searching of reference the interpreter starts by the innermost scope (current block) then enclosing functions, modules and built-in names It is important to realize that scopes are determined textually: the global scope of a function defined in a module is that module’s namespace, no matter from where or by what alias the function is called. If no global statement is in effect – assignments to names always go into the innermost scope. Assignments do not copy data — they just bind names to objects.
# reference a non local variable in a function
+     nonlocal spam
+# or a global
+    global spam
+

Object

Python is a OOP, with polymorphism and inheritance.

A class construction is a method declare as def __init__(self): A destructor is def __del__(self):. A toString is def _str__():

Class definitions, like function definitions (def statements) must be executed before they have any effect. When a class definition is entered, a new namespace is created, and used as the local scope — thus, all assignments to local variables go into this new 'class' namespace.

class Complex:
+    """ Represents mathematical Complex number
+    """
+    # called once instance is created to initialize internal attributes (like java constructor)
+    def __init__(self, realpart, imagpart):
+        self.r = realpart
+        self.i = imagpart
+
+
+# creating an instance
+x=Complex(3,2)
+# attribute reference ; class.attname
+x.i
+

The only operations understood by instance objects are attribute references. There are two kinds of valid attribute names, data attributes and methods.

class MyClass(object):
+    '''
+    classdocs
+    '''
+
+    def __init__(self, p:str):
+        '''
+        Constructor
+        '''
+        self.a=p
+        self.n=1
+    def f(self):
+        return self.n+1
+
+c=MyClass('an object')
+# can add attribute dynamically into object even if class did not define it...
+c.b="a new attribute"
+print(c.a)
+# an object
+print(c.b)
+# a new attribute
+print(c.f())
+# 2
+

Clients should use data attributes with care — clients may mess up invariants maintained by the methods by stamping on their data attributes. Python supports inheritance and search for attributes is done using a depth-first, left to right approach.

class DerivedClassName(Base1, Base2, Base3):
+

There is no private instance variables inside an object. The naming convention using _ before the name should be treated as non-public part of the API.

Module

A module is a file containing python definitions and statements. The filename = module name. Definitions from a module can be imported into other modules or into the main module. Be sure to take the folder hierarchy as package hierarchy. A module can contain executable statements as well as function definitions. These statements are intended to initialize the module. They are executed only the first time the module name is encountered in an import statement.

# To see the variables and function of a module
+import math
+print(dir(math))
+# give combined documentation for all the functions and values in the module 
+help(math)
+
Always import only the specific things we'll need from each module.

To make a module executable we need a main statement

if __name__ == "__main__":
+

The directory containing the script being run is placed at the beginning of the search path, ahead of the standard library path: scripts in that directory will be loaded instead of modules of the same name in the library directory. The interpreter first searches for a built-in module then it searches for a file named spam.py in a list of directories given by the variable sys.path (current directory + PYTHONPATH). To speed up loading modules, Python caches the compiled version of each module in the pycache directory under the name module.version.pyc, where the version encodes the format of the compiled file; it generally contains the Python version number.

File I/O

You open a file in different mode with the open . Files are text or binary

# first create a text file
+f = open('atext .txt','w' )
+f.write('A first line\n')
+f.write('A second line\n')
+f.close()
+
+f = open('atext.txt', 'r')
+f.readline()
+# 'A first line\n'
+f.readline()
+# 'A second line\n'
+f.readline()
+# no more line is '' empty string
+
+# read all lines and build a list: 2 ways
+lines=f.readlines()
+list(f)
+# read line by line: very efficient as use limited memory. f is an iterator over the lines
+for line in f
+
+# a dictionary persisted as json in text file
+import json
+f = open('critics.txt', 'w')
+json.dump(critics,f)
+f.close()
+# reload it
+g = open( 'critics.txt','r' )
+d=json.load(g)
+print(d[' Toby'])
+

Python doesn't flush the buffer—that is, write data to the file—until it's sure you're done writing. One way to do this is to close the file. File objects contain a special pair of built-in methods: __enter__() and __exit__().

See code python-bible/readAssetFromFolder.py which uses Git client to get origin URL.

Date

See the datetime module

import datetime
+
+print ('Current date/time: {}'.format(datetime.datetime.now()))
+
+ d= datetime.date(2018,9,23)
+ d= datetime.date.today()
+ datetime.datetime.today()
+
+datetime.datetime(2019, 9, 23, 18, 34, 26, 856722)
+
+
+date_time_str = 'Jun 28 2018  7:40AM'
+date_time_obj = datetime.datetime.strptime(date_time_str, '%b %d %Y %I:%M%p')
+
+# transform to a string
+d.strftime("%Y-%m-%d %H:%M:%S")
+

Unit testing

unittest is based on Kent Beck's work on unit testing like the junit library.

  • define a module with a class which extends TestCase, use the setUp and tearDown methods to set context before each test method.

  • Add test method and use assert* to validate test results.

Consider pytest as another modern tool to do testing in python.

Reading command line arguments

import sys
+print("This is the name of the script: ", sys.argv[0])
+print("Number of arguments: ", len(sys.argv))
+print("The arguments are: " , str(sys.argv))
+

Doing HTTP requests

See code under web_data, but with python 3 the approach is to use request.

Python Flask WebApp

The project python-code includes the angular-flask folder to present some simple examples of how to use Flask with Angular.

See this note for details.

Data management

Pandas

Create a data frame with two columns

data = DataFrame({'message': [], 'class': []})
+

Create n records with timestamp from one start time:

start_time = datetime.datetime.today() 
+c=pd.date_range(start_time, periods=nb_records, freq=METRIC_FREQUENCY)
+

Transforming to string

c.strftime("%Y-%m-%d %H:%M:%S")
+

Split data into training and test sets

splitIndex = np.random.rand(len(data)) < 0.8
+train = data[splitIndex]
+test = data [~splitIndex]
+
\ No newline at end of file diff --git a/search/search_index.json b/search/search_index.json new file mode 100644 index 0000000..9cbc5af --- /dev/null +++ b/search/search_index.json @@ -0,0 +1 @@ +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Python Studies","text":"

This repository regroups a bench of python codes from my own self-training and studies for web development, crawling, python best practice, and raspberry PI work.

"},{"location":"#language-advantages-disadvantages","title":"Language Advantages / disadvantages","text":"

Advantages:

  • Interpreted with shell to start quickly, more concise language
  • 2nd most used programming language
  • A lot of libraries, used a lot by data scientists
  • Combines functional and OOP.
  • Raspberry PI language of choice

Disadvantages:

  • Slow, not supporting well multi cpu / threading architecture
  • Not great for mobile and 3D game programming
"},{"location":"#code-in-the-order-of-knowledge-acquisition","title":"Code in the order of knowledge acquisition","text":""},{"location":"#basics","title":"Basics","text":"
  • firstinput.py for reading user input
  • Variable scope between global, local,...
  • travis.py to play with lists, for in range() and conditions
  • cinema.py to illustrate how to use for dictionary
  • Play with data structures: lists, queues, matrix, sets, and more dictionaries, with how to navigate into those structures
  • Reverse a word and add aye, use loops, break, in voyals...
  • Object Oriented Python: classes and inheritance: using constructor (init()) and method with self argument.
  • Modules, import, and packages. Do not forget to set PYTHONPATH to the root folder to access any new modules
"},{"location":"#flask","title":"Flask","text":"
  • Flask web app hello world then REST API end point with Flask and staticApp.py
  • Flask serving a simple angular App
  • TDD with Flask app and docker from testdriven.io course
"},{"location":"#algorithms","title":"Algorithms","text":"
  • Sorting arrays: Bubblesort, selection sort, insertion sort and quicksort.
  • Binary Tree with InOrderTraversal, PreOrderTraversal, PostOrderTraversal.
  • Binary search within a sorted array which is a divide and conquer algorithm.
  • Depth First Search, graph, Breadth First Search DFS: explores the highest-depth nodes first before being forced to backtrack and expand shallower nodes. BFS: explores all of the neighbor nodes at the present depth prior to moving on to the nodes at the next depth level.
"},{"location":"#graphics","title":"Graphics","text":"
  • Use a simple graphics API to create a window, draw circle and move them.
  • Plotting normal curve with matplotlib
"},{"location":"#web-scrawling","title":"Web scrawling","text":"

Use urllib and beautiful soup to remove html tags from a web page to get text to parse. See this note for guidances

  • Use regular expression (re module) to extract number from a text read from a file.
"},{"location":"#astronomy","title":"Astronomy","text":"

See detailed note here and code is under astronomy folder.

"},{"location":"#aws","title":"AWS","text":"

To get some sample code to use AWS SDK see this folder.

"},{"location":"#some-tricks","title":"Some tricks","text":"
  • placing cursor to previous line and enter will copy the line to a new line
"},{"location":"dev-env/","title":"Development environments","text":"

Apple Mac OS uses python for its own operations, so it is very important to isolate the development environment from the operation of the OS to avoid compromising the integrity of the whole system. So virtualenv can be used, but in today world, docker and pipenv are the way to go as they:

  • avoid installing softwares and libraries not often used on the native OS
  • describe the dependencies on library so programs developed 5 years ago should still run
  • easy to switch laptop
  • quicker provisioning than a VM running with Vagrant, still offering mounting host folder, running under localhost
  • use docker compose for each project to manage component dependencies
  • if needed pipenv can be used to set up virtual environment to isolate dependencies

Clone this project

git clone https://github.com/jbcodeforce/python-code\ncd python-code\n
"},{"location":"dev-env/#use-the-different-docker-images","title":"Use the different docker images","text":"

The DockerfileForEnv in the current project defines an image for running python 3.7 with Flask, pytest, panda and other basic libraries.

To build the image you need docker engine and do the following

docker build -t jbcodeforce/python37 . \n

Then start the image as container with the command below, which also mount your local folder to the /home folder inside the docker container:

docker run -e DISPLAY=192.168.1.89:0 --name pythonenv -v $(pwd):/home/ -it --rm -p 5000:5000 jbcodeforce/python37 bash\n

Note

The script named startPythonDocker.sh performs this command.

The docker image includes pipenv for improving the dependency management.

The other Dockerfile for astrophysic is Here

"},{"location":"dev-env/#use-pipenv","title":"Use pipenv","text":"

Pipenv offers the last best practices from other language to manage virtual environment and dependencies for Python. Adding and removing packages is also updating the dependencies descriptor file: Pipfile. It basically combine pip and virtualenv. It helps addressing build inconsistency that requirements.txt brings.

To install it on the mac:

brew install pipenv\n

When using the docker image you do not need to install pipenv on the host. It is also available in the docker image so the following commands should work from the bash inside the docker container.

# Create a new project using Python 3.7\npipenv --python 3.7\n# start the virtual env shell\npipenv shell\n# or start a python interpreter\npipenv run python\n# or run a program with python interpreter\npipenv run python FaustEasiestApp.py\n# install dependencies including dev\npipenv install --dev\n#Check your installed dependencies for security vulnerabilities:\npipenv check\n# print a pretty graph of all your installed dependencies.\npipenv graph # Uninstalls all packages not specified in Pipfile.lock.\npipenv clean\n# lock dependencies\npipenv lock\n
"},{"location":"dev-env/#run-the-python-interpreter","title":"Run the python interpreter","text":"

Start python in the container shell:

root@485cff8245df:/home#  python\nPython 3.7.4 (default, Jul  9 2019, 00:06:43) [GCC 6.3.0 20170516] on linux\n>>> 

Use exit() to get out of the python interpreter, and Ctrl D for getting out of the Docker container.

"},{"location":"dev-env/#using-graphics-inside-the-python-container","title":"Using graphics inside the python container","text":"

The approach is to run graphics program inside python interpreter, but the windows will appear on the host machine (the Mac). To do so we need a bidirectional communication between the docker container and the Mac. This is supported by the socat tool. To install it the first time do the following:

brew install socat\n

When installed, open a new terminal and start socat with the command:

socat TCP-LISTEN:6000,reuseaddr,fork UNIX-CLIENT:\\\"$DISPLAY\\\"\n

As the container is running X window system, we need to also install a X system on Mac. This is done via the Xquartz program:

brew install xquartz\n

Then start Xquartz from the application or using

open -a Xquartz\n

A white terminal window will pop up. The first time Xquartz is started, open up the preferences menu and go to the security tab. Then select \u201callow connections from network clients\u201d to check it on.

See this note from Nils De Moor for more information.

"},{"location":"astronomy/","title":"Python programming for astronomy","text":"

The dockerfile defines a python env with astropy, matplotlib, numpy,scipy...

To build and run it:

docker build -t quay.io/jbcodeforce/mypythonplay .\ndocker run -ti quay.io/jbcodeforce/mypythonplay bash\n
"},{"location":"astronomy/#pulsars","title":"Pulsars","text":"

The goal is to answer: how many pulsars are detected in images taken with the Murchison Widefield Array telescope? The array telescope, detects radio emission at frequencies between 80 and 300 megahertz. It has a very large field of view, which means it's great for doing large survey projects. Images has using a grayscale to measure the flux density of emission from astronomical objects. Black is high flux density and gray is the background noise. Radio frequencies don't have color. These color maps are just used to accentuate different aspects of the intensity scale.

In radio astronomy, flux density is measured in units of Janskys, which is equivalent to 10 to the power of -26 watts per square meter per hertz.

)

Astronomy images are usually stored in a file format called FITS, and to view them you can download software like DS9 or use an online tool like Aladin.

We typically call something a detection if the flux density is more than five standard deviations higher than the noise in the local region.

To search from non-detection, a special approach is used called Stacking which measures the statistical properties of a population we can't detect. Stacking works because the noise in a radio image is roughly random, with a Gaussian distribution centered on zero. When you add regions of an image that just have noise, the random numbers cancel out. But when you add regions of an image in which there are signals, the signals add together, increasing what we call the signal to noise ratio.

Introduction to Pulsars (from CSIRO) Hobbs, M. (n.d.) from http://www.atnf.csiro.au/outreach/education/everyone/pulsars/index.html

Pulsar Properties (from NRAO, advanced) National Radio Astronomy Observatory. (2010) from http://www.cv.nrao.edu/course/astr534/Pulsars.html

"},{"location":"astronomy/#calculating-the-mean-median-stack-of-a-set-of-fits-images","title":"Calculating the mean / median stack of a set of FITS images","text":"

In Flexible Image Transport System (FITS) the image is stored in a numerical array, which we can load into a NumPy array. Opening a FITS file in astropy returns a HDU (Header/Data Unit) list. Each HDU stores headers and (optionally) image data. Here is a program to find the point in the image with the maximum intensity:

from astropy.io import fits\ndef search_brightest_pixel(fname):\n  hdulist = fits.open(fname)\n  data = hdulist[0].data\n  nb_row,nb_col = data.shape\n  max = 0\n  x , y = (0,0)\n  for r in range(0,nb_row):\n    for c in range(0,nb_col):\n      if data[r][c] > max :\n          x = r\n          y = c\n          max = data[r][c]\n  return x,y\n

A better approach is to use the median (the middle of the sorted data set), as the mean is easily skewed by outliers. But getting median could get computational intensive and consuming a lot of memory as calculating the median requires all the data to be in memory at once. This is an issue in typical astrophysics calculations, which may use hundreds of thousands of FITS files. To compute the median we can use the statistics library, or the following approach:

fluxes = [17.3, 70.1, 22.3, 16.2, 20.7]\nfluxes.sort()\nmid = len(fluxes)//2\nmedian = fluxes[mid]\n# or for an even number of elements\nmedian = (fluxes[mid - 1] + fluxes[mid])/2\n

or using numpy:

data = load_stack(fnames)\nstack = np.dstack(data)\nmedian = np.median(stack, axis=2)\n

To avoid loading all the data in memory, we can use the binapprox algorithm to approximate the current median. The idea behind it is to find the median from the data's histogram. Starting from the left, if we sum up the counts in the histogram bins until we get to just over the expected mediam then we know the last bin we added must have contained the median. In fact it is better to search in bins around the standard devision of the mean. See Stacking/binapprox.py code.

"},{"location":"astronomy/#agn-active-galactic-nucleus","title":"AGN: Active Galactic Nucleus","text":"

Our eyes can only detect light and the visible part of the electromagnetic spectrum. Galaxy has Xray, visible and radio waves. At the center of the galaxy is a black hole, which has a huge impact on the galaxy's growth and formation. In cases where there is a lot of gas in the central region of the galaxy, this material can be accreted on to the black hole via an Accretion Disk, releasing a lot of energy in the process. This is what we call, an Active Galactic Nucleus. The radiation produced by the AGN is so bright that it can outshine the entire galaxy, producing far more energy than all of the galaxy's stars combined. It may form huge jets of strong magnetic fields emanating out from around the black hole. Here is an image of combined wave length from visible, X-ray and radio

The material that accretes onto a black hole produce X-rays, because particle are becoming very hot. We can assess the presence of supermassive black hole by measuring powerful jets coming from a compact core, rapid changes in the luminosity of the galaxy nucleus, very high speed orbital motions of stars in the galactic nucleus.

"},{"location":"astronomy/#cross-matching","title":"Cross-matching","text":"

When investigating astronomical objects, like active galactic nuclei (AGN), astronomers compare data about those objects from different telescopes at different wavelengths. This requires positional cross-matching to find the closest counterpart within a given radius on the sky.

To create a catalog of object from survey images, the source-finding software uses the same technics of going through all the pixels and find peaks that are statistically significant.

How to calculate distance in the sky? Two objects in the same image are not in the same plane, we can compute the angular distance but they may be far aways on those line.

The cross matching between 2 catalogs: The BSS catalogue lists the brightest sources from the AT20G radio survey while the SuperCOSMOS catalogue lists galaxies observed by visible light surveys.

The positions of stars, galaxies and other astronomical objects are usually recorded in either equatorial or Galactic coordinates.

  • Right ascension: the angle from the vernal equinox to the point, going east along the celestial equator. Given in hours-minutes-seconds (HMS). 1 hour = 15 degrees
  • Declination: the angle from the celestial equator to the point, going north (negative values indicate going south). Recorded in degrees-minutes-seconds (DMS) notation. A full circle is 360 degrees, each degree has 60 arcminutes and each arcminute has 60 arcseconds.

The vernal equinox is the intersection of the celestial equator and the ecliptic where the ecliptic rises above the celestial equator going further east.

To crossmatch two catalogues we need to compare the angular distance between objects on the celestial sphere, which is the projected angle between objects as seen from Earth.

See cross-matching.py code for in place comments and study. But this program is in O(n*m), there is a Astropy library with cross marching, using k-d-tree as demonstrated in this code).

"},{"location":"astronomy/#statistic-data-science-helps-astronomy","title":"Statistic / data science helps Astronomy","text":"

Data could not answer directly what you want to find. so we can use probability theory to assess if the data provide answer. The approach is to try to assert hypothesis and derive what kind of data we should expect to see. Then you use the fit model approach by selecting the hypothesis that fit the best the data and throw away the ones that dont fit the data. 2016 set a record for the biggest haul of exoplanets, when the Kepler team applied statistical validation to verify over a thousand new planets.

"},{"location":"astronomy/#exoplanets","title":"Exoplanets","text":"

The science of exoplanets kicked off back in the late 1990s, the success of the space telescopes CoRoT and Kepler has really accelerated the field. Back in the 90s, we were discovering one or two planets a year, on average.

Kepler helps discover hundreds of new planets are being confirmed every year, with thousands more candidates being found.

The most common planets are the super earth. The NASA public catalog. Here are some of the helpful attributes

Attribute Description Kepler ID Unique target identification number for stars KOI name String identifier for the Kepler Object of Interest (KOI) Teff (K) Effective temperature of a star in Kelvin Radius Radius of stars and planets in units of solar radius/earth radius respectively Kepler name Unique string identifier for a confirmed exoplanet in the planet table Period Orbital period of a planet in units of days Status Status of a discovered KOI in the planet table, e.g. \"confirmed\" or \"false positive\" Teq Equilibrium temperature of a planet in Kelvin

Some interesting queries:

SELECT koi_name, radius FROM Planet ORDER BY radius DESC LIMIT 5;\n# analyse the size of the unconfirmed exoplanets (kepler_name is null).\nSELECT MIN(radius), MAX(radius), AVG(radius), STDDEV(radius) FROM Planet where kepler_name is NULL;\n# how many planets in the Planet database are in a multi-planet system\nselect kepler_id, count(koi_name) from Planet group by kepler_id having count(koi_name) > 1 order by count(koi_name) desc;\n

Which Earth sized planets are in the inhabitable zone of the host star?

To work out which planets are in the habitable zone, we'll consider the energy budget of a planet. How much energy it receives from its star versus how much it radiates back into space. The intensity of the energy decrease the further the planet is from its star. The incoming energy budget of the planet clearly depends on the brightness of its star, and how close the planet is to that star.

The insulation flux for earth is 1361 W/ m2

"},{"location":"aws/","title":"AWS","text":""},{"location":"aws/#boto3-library","title":"boto3 library","text":"

A unique library to access all AWS services from a python app.

"},{"location":"aws/#installation","title":"Installation","text":"
pip install boto3[crt]\n

Set up authentication credentials for your AWS account using either the IAM Console or the AWS CLI.

aws configure\n# Verify access\naws iam list-users\n

Info

The jbcodeforce/python docker image has the aws cli and goto3.

"},{"location":"aws/#programming-samples","title":"Programming samples","text":""},{"location":"aws/#access-s3","title":"Access S3","text":"
import boto3\n# declare a client to the service you want\ns3 = goto3.service(\"s3\")\n# use SDK API for s3.\ns3.buckets.all()\n
"},{"location":"aws/#access-dynamodb","title":"Access DynamoDB","text":"

The client have can get the table name using the API client:

import os, boto3\n\nAWS_ACCESS_KEY_ID=os.environ.get(\"AWS_ACCESS_KEY_ID\")\nAWS_SECRET_ACCESS_KEY=os.environ.get(\"AWS_SECRET_ACCESS_KEY\")\n\nclient = boto3.client(\n    'dynamodb',\n    aws_access_key_id=AWS_ACCESS_KEY_ID,\n    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,\n    )\ntable = client.list_tables()\ntableName=table['TableNames'][0]\n

Then use the dynamoDB API:

orderTable = dynamodb.Table(tableName)\n\ndynamodb = boto3.resource(\n    'dynamodb',\n    aws_access_key_id=AWS_ACCESS_KEY_ID,\n    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,\n    )\n\norderTable = dynamodb.Table(tableName)\norderTable.put_item(\n   Item={\n     \"orderID\": \"ORD001\",\n     \"customerID\": \"C01\", \n     \"productID\": \"P01\", \n     \"quantity\": 10,  \n     \"destinationAddress\": { \"street\": \"1st main street\", \"city\": \"Santa Clara\", \"country\": \"USA\", \"state\": \"CA\", \"zipcode\": \"95051\" }\n   })\n
  • Run it once the python virtual env is enabled with python dynamoClient.py
"},{"location":"aws/#cdk-with-python","title":"CDK with python","text":"

Separate note in AWS_Studies.

"},{"location":"flask/flask-tdd-docker/","title":"Flask microservice with TDD and docker","text":"

This content is based on the tutorial from tesdriven.io and covers:

  • pipenv for virtual environment and dependencies management
  • Flask Restful where resources are build on top of Flask views
  • Flask CLI tool to run and manage the app from the command line.
  • Debugging in development mode
  • Docker for python developer
  • Flask-sqlalchemy to support SQLAlchemy in Flask
  • Psycopg is the most popular PostgreSQL adapter for the Python Here is a quick summary of things learnt.
  • Postgresql docker image
  • Pytest for unit and functional testing
  • Blueprints for organizing code and components

The folder Flask/flask-tdd-docker includes the training code.

"},{"location":"flask/flask-tdd-docker/#set-virtual-env","title":"Set virtual env","text":"

The old way to define virtual environment was to use the following approach:

python3.7 -m venv env\nsource env/bin/activate\n# play with python ...\n# Stop with:\ndeactivate\n

As of today, the approach is to use pipenv, where you update the project and development dependencies in a Pipfile.

pipenv --python 3.7\n# start the virtual env\npipenv shell\npipenv install --dev\n

Freeze the dependencies:

pipenv lock -r > requirements.txt\n
"},{"location":"flask/flask-tdd-docker/#define-and-run-the-flask-app","title":"Define and run the flask app","text":"

Define a manage.py to represent the app, and use the Flask CLI shell to manage the app from command line:

from flask.cli import FlaskGroup\nfrom project import app\n\n\ncli = FlaskGroup(app)\n\n\nif __name__ == '__main__':\n    cli()\n
export FLASK_APP=project/__init__.py\n# use the Flask CLI from inside the app itself\npython manage.py run\n

Run in development mode for debugging.

export FLASK_ENV=development\npython manage.py run\n* Serving Flask app \"project/__init__.py\" (lazy loading)\n* Environment: development\n* Debug mode: on\n

With the Flask shell we can explore the data in the application:

flask shell\n
"},{"location":"flask/flask-tdd-docker/#using-docker-and-docker-compose","title":"Using docker and docker compose","text":"

The dockerfile uses alpine linux and non root user. The docker compose uses volume to mount the code into the container. This is a must for a development environment in order to update the container whenever a change to the source code is made. Then build the image using docker compose.

docker-compose build\n# then start in detached mode\ndocker-compose up -d\n# Rebuild the docker images \ndocker-compose up -d --build\n# Access app logs\ndocker-compose logs\n# Access to a python shell to control the flask app\ndocker-compose exec users flask shell\n
"},{"location":"flask/flask-tdd-docker/#add-persistence-on-postgresql-and-use-sqlalchemy","title":"Add persistence on Postgresql and use SQLAlchemy","text":"

To initialize the postgresql copy a sql file under /docker-entrypoint-initdb.d (creating the directory if necessary).

docker compose section for postgresql:

users-db:  build:\ncontext: ./project/db\ndockerfile: Dockerfile\nexpose:\n- 5432\nenvironment:\n- POSTGRES_USER=postgres\n- POSTGRES_PASSWORD=postgres\n

Once spun up, PostgreSQL will be available on port 5432. Be sure to include dependencies in the app dockerfile

# install dependencies\nRUN apk update && \\\napk add --virtual build-deps gcc python-dev musl-dev && \\\napk add postgresql-dev && \\\napk add netcat-openbsd && \\\npip install --upgrade pip && \\\npip install --upgrade --user pipenv 

Also to avoid having the application getting error because it could not contact the database add a entrypoint.sh shell to loop until the database is accessible before starting the python app.

To access psql, use the following docker compose command

docker-compose exec users-db psql -U postgres\n\npsql (11.4)\nType \"help\" for help.\n\npostgres=# \\c users_dev\nYou are now connected to database \"users_dev\" as user \"postgres\".\nusers_dev=# \\dt\nDid not find any relations.\nusers_dev=# \\q\n

In the manage.py file, register a new flask CLI command, recreate_db, so that we can run it from the command line like:

docker-compose exec users python manage.py recreate_db\n
# @cli.command('recreate_db')\ndef recreate_db():\n    db.drop_all()\n    db.create_all()\n    db.session.commit()\n
"},{"location":"flask/flask-tdd-docker/#add-tests-with-pytest","title":"Add tests with pytest","text":"

While unittest requires test classes, Pytest just requires functions to get up and running.

Define fixtures as reusable elements for future tests

They have a scope associated with them, which indicates how often the fixture is invoked:

  • function - once per test function
  • class - once per test class
  • module - once per test module
  • session - once per test session

Some fixture execution guidance

Define python script using 'test_' or '_test.py'. Here is an example of functional testing:

def test_ping(test_app):\n    client = test_app.test_client()\n    resp = client.get('ping')\n    data = json.loads(resp.data.decode())\n    assert resp.status_code == 200\n    assert 'pong' in data['message']\n    assert 'success' in data['status']\n

Execute test with pytest: pytest project/tests/ or with docker compose

docker-compose exec users pytest \"project/tests\"\n
"},{"location":"flask/flask-tdd-docker/#test-coverage","title":"Test coverage","text":"

Coverage.py is a popular tool for measuring code coverage in Python-based applications. Now, since we're using Pytest, we'll integrate Coverage.py with Pytest using pytest-cov. In Pipfile add pytest-cov = \">=2.7.1\" then do pipenv install

Then once the image is rebuilt, run the following command to assess the test coverage:

docker-compose exec users pytest \"project/tests\" -p no:warnings --cov=\"project\"\n# or using html page\ndocker-compose exec users pytest \"project/tests\" -p no:warnings --cov=\"project\" --cov-report html\n

Remember: just because you have 100% test coverage doesn\u2019t mean you're testing the right things

"},{"location":"flask/flask-tdd-docker/#code-quality","title":"Code quality","text":"

Linting is the process of checking your code for stylistic or programming errors. Although there are a number of commonly used linters for Python, we'll use Flake8 since it combines two other popular linters -- pep8 and pyflakes.

In Pipfile add flake8 = \">=3.7.8\", do a pipenv install then freeze the dependencies with pipenv lock -r > requirements.txt, then rebuild the docker image and run flake8:

 docker-compose exec users flake8 project\n

Black helps to format code and apply code formatting:

# check\ndocker-compose exec users black project --check\n# see the propose changes\ndocker-compose exec users black project --diff\n# apply the change\ndocker-compose exec users black project\n
"},{"location":"flask/flask-tdd-docker/#add-blueprints-template","title":"Add Blueprints template","text":"

Blueprints are self-contained components, used for encapsulating code, templates, and static files. They are apps within the app. For example REST resource can be defined in Blueprint.

For example to add an api and a resource, define a new py file, and create a blueprint instance:

users_blueprint = Blueprint('users', __name__)\napi = Api(users_blueprint)\n

Then define a class with functions to support the expected Resource, and add this class to a url to the api.

class UsersList(Resource):\n    def get(self):\n        ...\n    def post(self):\n        ...\napi.add_resource(UsersList, '/users')\n

Finally register the resouce to the flask application:

    from project.api.users import users_blueprint\n    app.register_blueprint(users_blueprint)\n

See the code in users.py and init.py

Factory to create an app needs to be named create_app.

"},{"location":"flask/flask-tdd-docker/#adding-admin-and-model-view","title":"Adding admin and model view","text":""},{"location":"flask/flask-tdd-docker/#production-deployment-with-gunicorn","title":"Production deployment with gunicorn","text":"

Create a specific Dockerfile.prod and set the environment variable to run Flask in production mode and use gunicorn as container, and run under a user that is not root.

ENV FLASK_ENV production\nENV APP_SETTINGS project.config.ProductionConfig\n\n# add and run as non-root user\nRUN adduser -D myuser\nUSER myuser\n\n# run gunicorn\nCMD gunicorn --bind 0.0.0.0:$PORT manage:app\n
"},{"location":"flask/flask-tdd-docker/#heroku","title":"Heroku","text":"

Using heroku CLI.

$ heroku login\n# create a app\n$ heroku create \nCreating app... done, murmuring-shore-37331\nhttps://murmuring-shore-37331.herokuapp.com/ | https://git.heroku.com/murmuring-shore-37331.git\n\n# login to docker private registry\n$ heroku container:login\n\n# create a postgresql with the hobby-dev plan\n$ heroku addons:create heroku-postgresql:hobby-dev --app murmuring-shore-37331\n\nCreating heroku-postgresql:hobby-dev on murmuring-shore-37331... free\nDatabase has been created and is available\n ! This database is empty. If upgrading, you can transfer\n ! data from another database with pg:copy\nCreated postgresql-horizontal-04149 as DATABASE_URL\nUse heroku addons:docs heroku-postgresql to view documentation\n\n# Get database URL\nheroku config:get DATABASE_URL --app murmuring-shore-37331\n

The containers used at Heroku are called \u201cdynos.\u201d Dynos are isolated, virtualized Linux containers that are designed to execute code based on a user-specified command.

To build an image for the docker private registry, using the web dyno, that is free.

$ docker build -f Dockerfile.prod -t registry.heroku.com/murmuring-shore-37331/web .\n# publish\n$ docker push registry.heroku.com/murmuring-shore-37331/web:latest\n# test locally\n$ docker run --name flask-tdd -e \"PORT=8765\" -p 5002:8765 registry.heroku.com/murmuring-shore-37331/web:latest\n\n# Release the image, meaning the app will be based on the container image\n$ heroku container:release web --app murmuring-shore-37331\nReleasing images web to murmuring-shore-37331... done\n

Once the image is \"released\", the app is accessible via https://murmuring-shore-37331.herokuapp.com/ping

Access to logs: heroku logs --app murmuring-shore-37331

The users are not yet created, so we can run the CLI heroku run:

# create DB\nheroku run python manage.py recreate_db --app murmuring-shore-37331\n# populate the data\nheroku run python manage.py seed_db --app murmuring-shore-37331\n# Access the database with psql: \n# 1. start a local docker postgresql with psql\ndocker run -ti postgresql bash\n> psql postgres://....\n> PSQL:\n
"},{"location":"flask/readme/","title":"Python Flask Studies","text":"

The most complete starter code is from the Flask TDD tutorial and using docker. But I have incremental apps, to make it simpler to develop an app from scratch.

"},{"location":"flask/readme/#some-concepts","title":"Some concepts","text":"

Flask app takes care of dispatching requests to views and routes.

"},{"location":"flask/readme/#samples","title":"Samples","text":"

To use a boiler plate code with Flask, Blueprint, Swagger, Prometheus see the boiler plate folder.

"},{"location":"flask/readme/#the-simplest-flask-app","title":"The simplest Flask app","text":"

The simplest Flask app is presented in the quickstart and the matching code is under Flask/firstApp/firstApp.py. To execute it in your python environment:

cd Flask/firstApp\n# start docker image for dev environment\ndocker run -ti -v $(pwd):/app -p 5000:5000 jbcodeforce/python37 bash\n# Can run it with python - it will start in debug mode\npython firstApp.py\n# Or run it with flask CLI\nexport FLASK_APP=firstApp.py\nflask run --host=0.0.0.0\n * Serving Flask app \"firstApp\"\n* Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)\n127.0.0.1 - - [17/Dec/2018 20:49:42] \"GET / HTTP/1.1\" 200 -\n

The FLASK_APP environment variable is the name of the module to import at flask run.

To make the server publicly available simply by adding --host=0.0.0.0 to the command: flask run --host=0.0.0.0

If we want to run it in debug mode then any change to the code reload itself. To do so use:

export FLASK_ENV=development\nflask run --host=0.0.0.0\n

Next, is to use gunicorn to run it on top of a wsgi server so in the docker container add:

gunicorn -w 4 -b 0.0.0.0:5000 firstApp:app\n

Which is the command in the dockerfile under the firstApp folder:

 docker build -t jbcodeforce/firstApp .\n

Start the image with

docker run --name firstApp --rm -p 5000:5000 jbcodeforce/firstApp\n
"},{"location":"flask/readme/#serving-static-pages","title":"Serving static pages","text":"

Add a folder named static at the same level as app to start. The staticApp.py demonstrates the routing specified and the api to send the file.

from flask import Flask\napp = Flask(__name__)\n\n\n@app.route('/')\ndef root():\n    return app.send_static_file('404.html')\n\n\nif __name__ == \"__main__\":\n    app.run(debug=True,host='0.0.0.0')\n

and the execution:

export FLASK_APP=staticApp.py\nflask run\n * Serving Flask app \"staticApp\"\n* Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)\n127.0.0.1 - - [17/Dec/2018 21:29:00] \"GET / HTTP/1.1\" 200 -\n
"},{"location":"flask/readme/#a-rest-api","title":"A REST api","text":"

The route decorator is used to bind function to a URL. You can add variables and converter. The firstRESTApp.py illustrates the different patterns. The important modules to import are:

from flask import Flask, url_for, request, json\n

Accessing the HTTP headers is done using the request.headers dictionary (\"dictionary-like object\") and the request data using the request.data string.

A second nice module is Flask Restful. We can declare Resource class and use the API to link the resource to an URL.

The following code illustrates the resource class, with an argument passed at the constructor level to inject it into the resource. In this case this is a Kafka consumer which includes a map of the message read. The class is using the Blueprint module to simplify the management of resource:

# code of the resource.py\nfrom flask_restful import Resource, Api\nfrom flask import Blueprint\n\ndata_inventory_blueprint = Blueprint(\"data_inventory\", __name__)\ninventoryApi = Api(data_inventory_blueprint)\n\nclass DataInventory(Resource):  \n\n    def __init__(self, consumer):\n        self.consumer = consumer\n\n    # Returns the Inventory data in JSON format\n    @track_requests\n    @swag_from('data_inventory.yml')\n    def get(self):\n        logging.debug('[DataInventoryResource] - calling /api/v1/data/inventory endpoint')\n        return self.consumer.getAllLotInventory(),200, {'Content-Type' : 'application/json'}\n

The app.py that uses this resource, accesses the API and add_resource method, to define the resource class, the URL and then ant arguments to pass to the resource constructor.

from server.api.inventoryResource import data_inventory_blueprint, inventoryApi, DataInventory\n\n\napp = Flask(__name__)\n\ninventory_consumer = InventoryConsumer()\ninventoryApi.add_resource(DataInventory, \"/api/v1/data/inventory\",resource_class_kwargs={'consumer':inventory_consumer})\n

Flask REST API article

"},{"location":"flask/readme/#an-angular-app","title":"An Angular app","text":"

See this repository to a more complete example of angular development and Flask.

"},{"location":"flask/readme/#flask-tdd-docker","title":"Flask TDD Docker","text":"

See this dedicated note

"},{"location":"flask/readme/#flask-with-dynamodb-ecr-fargate","title":"Flask with DynamoDB, ECR, Fargate","text":"

See Code and Readme

"},{"location":"flask/readme/#flask-blueprint","title":"Flask Blueprint","text":"

Helps to structure the application in reusable components. To use in any Flask Blueprint, you have to import it and then register it in the application using register_blueprint(). A blueprint is an object that works like a flask app too. See the boiler plate example.

"},{"location":"pandas/","title":"Pandas summary","text":"

See the Kaggle quick tutorial on pandas.

Pandas has two core objects: the DataFrame and the Series.

DataFrame is like a table which contains an array of individual entries, each of which has a certain value. Each entry corresponds to a row (or record) and a column.

Series is a sequence of data values, it may be a single column of a DataFrame.

Here is a quick summary of some of the tutorial content:

# use pandas\nimport pandas as pd\n# Create a data frame from a dictionary whose keys are the column names and values are list of entries\npd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 'Sue': ['Pretty good.', 'Bland.']})\n# with row header as index\npd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], \n              'Sue': ['Pretty good.', 'Bland.']},\n             index=['Product A', 'Product B'])\n# read from file\nhome_data = pd.read_csv(a_file_path,index_col=0)\n# get row / columns size\nhome_data.shape\nhome_data.head()\n# Series\npd.Series([\"4 cups\", \"1 cup\", \"2 large\", \"1 can\"],index=[\"Flour\", \"Milk\", \"Eggs\", \"Spam\"],name=\"Dinner\")\n
"},{"location":"pandas/#indexing","title":"Indexing","text":"

Pandas uses two approach:

  • index-based selection: To select first row of a data frame: reviews.iloc[0]. To get a column with iloc use: reviews.iloc[:, 0]. We can select row too, like getting the last five elements of the dataset: reviews.iloc[-5:]
  • label-based selection. gets from data index value, not its position: reviews.loc[0, 'country']. Or select three columns and all rows: reviews.loc[:, ['taster_name', 'taster_twitter_handle', 'points']]

Label-based selection derives its power from the labels in the index. We can set index with:

reviews.set_index(\"title\")\n\n# Select elements that match condition \nreviews.loc[reviews.country == 'Italy']\n# or within a list\nreviews.loc[reviews.country.isin(['Italy', 'France'])]\n# not empty cell\nreviews.loc[reviews.price.notnull()]\n# get specific rows\nsample_reviews = reviews.iloc[[1,2,3,5,8],:]\n# Combining conditions\ntop_oceania_wines = reviews.loc[reviews.country.isin(['New Zealand','Australia']) & (reviews.points >= 95)]\n
# Get high-level summary of the attributes of the given column\nreviews.points.describe()\n# Get unique elements in a column\nreviews.taster_name.unique()\n# To see a list of unique values and how often they occur in the dataset\nreviews.taster_name.value_counts()\n# Ex bargain_wine with the title of the wine with the highest points-to-price ratio in the dataset.\nbargain_idx = (reviews.points / reviews.price).idxmax()\nbargain_wine = reviews.loc[bargain_idx, 'title']\n
"},{"location":"pandas/#map","title":"map","text":"

Map() takes one set of values and \"maps\" them to another set of values. map() should expect a single value from the Series and return a transformed version of that value.

review_points_mean = reviews.points.mean()\nreviews.points.map(lambda p: p - review_points_mean)\n# build a series to count how many times each of tropical, fruity words appears in the description column in the dataset.\ntopic= reviews.description.map(lambda d: \"tropical\" in d).sum()\nfruit= reviews.description.map(lambda d: \"fruity\" in d).sum()\ndescriptor_counts = pd.Series([topic,fruit], index=['tropical', 'fruity'])\n

apply() transforms a whole DataFrame by calling a custom method on each row.

Both methods don't modify the original data they're called on.

"},{"location":"pandas/#grouping","title":"Grouping","text":"

Use groupby() to group our data, and then do something specific to the group the data is in:

# Group of reviews which allotted the same point values to the given wines. \n# Then, for each of these groups, grab the `points` column and count how many times it appeared\nreviews.groupby('points').points.count()\n# Apply transformation to the new dataframe\nreviews.groupby('winery').apply(lambda df: df.title.iloc[0])\n

agg() lets us run a bunch of different functions on our DataFrame simultaneously

reviews.groupby(['country']).price.agg([len, min, max])\n

groupby combined with some specific operations may create multi-indexes, which looks like a tiered structure.

countries_reviewed = reviews.groupby(['country', 'province']).description.agg([len])\n# sort by ascending\ncountries_reviewed.sort_values(by='len', ascending=False)\n# To sort by index values, use the companion method sort_index()\n# sort by more than one column\ncountries_reviewed.sort_values(by=['country', 'len'])\n# most wine reviewer, using their twitter name\nreviews.groupby(['taster_twitter_handle']).taster_twitter_handle.count()\n# What is the best wine I can buy for a given amount of money? \nbest_rating_per_price = reviews.groupby('price').points.max().sort_index()\n# What are the minimum and maximum prices for each variety of wine? \nprice_extremes = reviews.groupby('variety').price.agg([min,max])\n# What are the most expensive wine varieties?\nsorted_varieties = reviews.groupby('variety').price.agg([min,max]).sort_values(by=['min','max'],ascending=False)\n# A series with index is reviewers and whose values is the average review score given out by that reviewer.\nreviewer_mean_ratings = reviews.groupby(['taster_name']).points.agg('mean')\n
"},{"location":"pandas/#data-types","title":"Data types","text":"
# Get data type of the points column\nreviews.points.dtype\n# All column types\nreviews.dtypes\n# convert to another type, when it makes sense\nreviews.points.astype('float64')\n
"},{"location":"pandas/#missing-values","title":"Missing values","text":"

Entries missing values are given the value NaN, short for \"Not a Number\".

reviews[pd.isnull(reviews.country)]\n# How many reviews in the dataset are missing a price?\nmissing_prices = reviews[reviews.price.isnull()]\nn_missing_prices = len(missing_prices)\n\n# What are the most common wine-producing regions? \n# Series counting the number of times each value occurs in the region_1 field. \n# This field is often missing data, so replace missing values with Unknown. \n# Sort in descending order.\nreviews.region_1.fillna('Unknown').value_counts().sort_values(ascending=False)\n\n# Replace a value by another\nreviews.taster_twitter_handle.replace(\"@kerinokeefe\", \"@kerino\")\n
"},{"location":"pandas/#renaming","title":"Renaming","text":"

rename() lets you rename index or column values by specifying a index or column keyword parameter, respectively.

reviews.rename(columns={'points': 'score'})\n\nreviews.rename(index={0: 'firstEntry', 1: 'secondEntry'})\n\n# Rename index\nreindexed = reviews.rename_axis('wines',axis='rows')\n
"},{"location":"pandas/#combining","title":"Combining","text":"

Pandas has three core methods for doing combining data frames. In order of increasing complexity, these are concat(), join(), and merge().

join() lets you combine different DataFrame objects which have an index in common

left = canadian_youtube.set_index(['title', 'trending_date'])\nright = british_youtube.set_index(['title', 'trending_date'])\n\nleft.join(right, lsuffix='_CAN', rsuffix='_UK')\n

The lsuffix and rsuffix parameters are necessary here because the data has the same column names in both British and Canadian datasets.

"},{"location":"python/compendium/","title":"Python readings","text":"
  • 3.8 release Product documentation
  • Python software foundation tutorial
  • Tutorial from Programiz
  • Using Docker For Python Web Development
  • The Flask Mega-Tutorial
  • Improve environment for Flask webapp
  • 10 Steps to Set Up Your Python Project for Success
  • Kafka Python
  • Python 2.0 Quick Reference: old web site
  • Getting started with pymongo a mongodb driver
  • pymongo documentation

Statistics

  • The Statistics and Calculus with Python Workshop
"},{"location":"python/faq/","title":"Python FAQ","text":""},{"location":"python/faq/#why-pipenv","title":"Why pipenv","text":"

pipenv resolves the problem of dependencies management, that is not perfectly done in the requirements.txt, which leads to underterministic build process. Given the same input (the requirements.txt file), pip doesn\u2019t always produce the same environment. pip freeze helps to freeze the dependencies and update your requirements.txt. But any dependency change needs to be done manually, and you need to track the dependent package version, for bug fix, or mandatory security fixes.

A second problem is the system wide repository used by pip. When developing multiple different projects in parallele that could become a real issue. pipenv use a per project environment. pipenv acts as pip + virtual environment. It uses Pipfile to replace requirements.txt and pipfile.lock for determnistic build. See this guide for command examples.

"},{"location":"python/faq/#how-to-get-program-arguments","title":"How to get program arguments?","text":"
  import sys,getopt\n  USER=\"jbcodeforce\"\n  FILE=\"./data/export-questions.json\"\n  try:\n    opts, args = getopt.getopt(argv,\"hi:u:\",[\"inputfile=\",\"user=\"])\n  except getopt.GetoptError:\n    print(usage())\n    sys.exit(2)\n\n\n  for opt, arg in opts:\n    if opt == '-h':\n      usage()\n      sys.exit()\n    elif opt in (\"-u\", \"--user\"):\n      USER = arg\n    elif opt in (\"-i\", \"--inputfile\"):\n      FILE = arg\n
"},{"location":"python/faq/#using-arg_parser","title":"Using arg_parser","text":"
import argparse\nparser = argparse.ArgumentParser()\nparser.add_argument(\"filename\", help=\"Must specify a file name\")\nparser.add_argument(\"--append\", help=\"Append records to existing file\",action=\"store_true\")\nargs = parser.parse_args()\nif args.append:\n    print(\"append to file\")\n
"},{"location":"python/faq/#what-__init__py-under-folder-used-for","title":"what __init__.py under folder used for?","text":"

The __init__.py file makes Python treat directories containing it as modules. Furthermore, this is the first file to be loaded in a module, so you can use it to execute code that you want to run each time a module is loaded, or specify the submodules to be exported.

"},{"location":"python/faq/#how-to-get-program-dependencies-generated","title":"How to get program dependencies generated?","text":"
pip freeze > requirements.txt\n
"},{"location":"python/faq/#access-environment-variable","title":"Access environment variable","text":"

Define environment variables in a .env file, use os package:

import os\n\nAWS_ACCESS_KEY_ID=os.environ.get(\"AWS_ACCESS_KEY_ID\")\nAWS_SECRET_ACCESS_KEY=os.environ.get(\"AWS_SECRET_ACCESS_KEY\")\n
"},{"location":"python/faq/#list-content-of-folder","title":"List content of folder","text":"
import glob\n\ndef listOfYaml():\n    return glob.glob(\"./*.yaml\")\n
"},{"location":"python/faq/#change-content-of-yaml","title":"Change content of yaml","text":"
import glob\nimport yaml\n\ndef listOfYaml():\n    return glob.glob(\"./*.yaml\")\n\ndef processYamlFile(f):\n    with open(f) as aYaml:\n        listDoc = yaml.safe_load(aYaml)\n    print(listDoc)\n    listDoc[\"metadata\"][\"namespace\"]='std-2'\n    print(listDoc)\n\n\nf = listOfYaml()\nprocessYamlFile(f[0])\n
"},{"location":"python/faq/#how-to-sort-unit-tests","title":"How to sort unit tests?","text":"

Use TestSuite and TestRunner. See TestPerceptron.py for usage.

import unittest\n\nclass TestPerceptron(unittest.TestCase):\n  # ....\ndef suite():\n    suite = unittest.TestSuite()\n    suite.addTest(TestPerceptron('testLoadIrisData'))\n    suite.addTest(TestPerceptron('testPlotIrisData'))\n\nif __name__ == \"__main__\":\n    runner = unittest.TextTestRunner(failfast=True)\n    runner.run(suite())\n
"},{"location":"python/faq/#how-to-traverse-a-directory-hierarchy","title":"How to traverse a directory hierarchy?","text":"
import osfor\nroot, dirs, files in os.walk(\"/mydir\"):    \n  for file in files:        \n    if file.endswith(\".txt\"):             \n      print(os.path.join(root, file))\n
"},{"location":"python/faq/#how-to-select-a-random-item-from-a-list","title":"How to select a random item from a list?","text":"
import random\nmove=random.choice(possibleMoves)\n
"},{"location":"python/faq/#logger","title":"Logger","text":"
import logging\n

Start python with the --log=INFO to set the logging level.

"},{"location":"python/faq/#reading-files","title":"Reading Files","text":""},{"location":"python/faq/#read-json-file","title":"Read json file","text":"
g = open('critics.json','r')\nd = json.load(g)\n
"},{"location":"python/faq/#read-csv-file","title":"Read csv file","text":"
f = open('fn.csv','r')\nfor line in f:\n  record = line.split(',')\n\n# or with unicode:\n  changedLine=u''.join(line).encode('utf-8').strip()\n
"},{"location":"python/faq/#read-file-with-specific-encoding","title":"Read file with specific encoding","text":"
 with open('../data/movielens/u.item',  encoding='ISO-8859-1') as f:\n
"},{"location":"python/faq/#skip-the-first-row-of-a-file","title":"Skip the first row of a file","text":"
f = open('fn.csv','r')\nf.readline()\nfor line in f:\n
"},{"location":"python/faq/#how-to-get-execution-time","title":"How to get execution time","text":"
import time\nstart = time.perf_counter()\n# potentially slow computation\nend = time.perf_counter() - start\n
"},{"location":"python/faq/#example-of-memory-consumption-for-object","title":"Example of memory consumption for object","text":"
import sys\n\na = 3\nb = 3.123\nc = [a, b]\nd = []\nfor obj in [a, b, c, d]:\n  print(obj, sys.getsizeof(obj))\n
"},{"location":"python/faq/#using-cloudevent","title":"Using CloudEvent","text":"
attributes = {\n      \"type\": \"com.anycompany.bdcp.user\",\n      \"source\": \"https://anycompany.com/user-mgr\",\n}\ndata = { \"eventType\": \"UserLogin\", \"username\": \"bob.the.builder@superemail.com\"}\nevent = CloudEvent(attributes, data)\nprint(event)\n
"},{"location":"python/faq/#what-is-zip","title":"What is zip?","text":"

Returns an iterator of tuples, where the i-th tuple contains the i-th element from each of the argument sequences or iterable:

dataset\n[[1, 20, 0], [2, 21, 1], [3, 22, 0]]\nfor a in zip(*dataset): print(a)\n(1, 2, 3)\n(20, 21, 22)\n(0, 1, 0)\n
"},{"location":"python/faq/#how-to-use-some-math","title":"How to use some math","text":"
# modulo\n8 % 2\n0\n#\n
"},{"location":"python/faq/#what-is-a-package","title":"What is a package?","text":"

A package is nothing more than a folder, which must contain a special file, init.py. (not needed anymore with python3.3+)

"},{"location":"python/faq/#what-are-namespace-and-scope","title":"What are namespace and scope?","text":"

A namespace is a mapping from names to objects. They are the built-in names, the global names in a module, and the local names in a function. A scope is a textual region of a Python program, where a namespace is directly accessible. There are four different scopes that Python makes accessible:

  • The local scope, which is the innermost one and contains the local names.
  • The enclosing scope, that is, the scope of any enclosing function. It contains non-local names and also non-global names.
  • The global scope contains the global names.
  • The built-in scope contains the built-in names.
"},{"location":"python/faq/#customize-matplotlib-graph","title":"customize matplotlib graph","text":"
    graph.set_title(\"Results of 500 slot machine pulls\")\n    # Make the y-axis begin at 0\n    graph.set_ylim(bottom=0)\n    # Label the y-axis\n    graph.set_ylabel(\"Balance\")\n    # Bonus: format the numbers on the y-axis as dollar amounts\n    # An array of the values displayed on the y-axis (150, 175, 200, etc.)\n    ticks = graph.get_yticks()\n    # Format those values into strings beginning with dollar sign\n    new_labels = ['${}'.format(int(amt)) for amt in ticks]\n    # Set the new labels\n    graph.set_yticklabels(new_labels)\n
"},{"location":"python/python-summary/","title":"Python Summary","text":"

See this good tutorial from Programiz

Python is an interpreted Object Oriented & functional language. It organizes the code in modules. Use blank to indent code block. The coding style is known as PEP8.

3.9 release Product documentation

"},{"location":"python/python-summary/#getting-started","title":"Getting started","text":"

Start a python interpreter: python3 and start entering python code, develop a progname.py file and use python3 progname.py, or add #!/usr/bin/env python3 to make it self runnable.

The code can be structured with function def name(): and then with our without a main part:

if __name__ == \"__main__\":\n

Better to use main statement when using objects and classes.

"},{"location":"python/python-summary/#concepts","title":"Concepts","text":""},{"location":"python/python-summary/#datatypes","title":"Datatypes","text":"
  • list:

    • concat lists: a = [1,2,3,4] then a = a + [5,6] or a + list(\"789\") -> [1,2,3,4,5,6,'7','8','9']. Lists are mutable.
    • len(a)
    • slicing: all elements except first and last: a[1:-1], all from index 3: a[3:]
    • list.append(a_record) modifies a list by adding an item to the end
    • list.pop() removes and returns the last element of a list
    • Get one object index using the list.index(object)
    • in list to assess if element in the list
    • list comprehensions:

    • squares = [n**2 for n in range(10)]

    • short_planets = [planet for planet in planets if len(planet) < 6]
  • dictionary is like json object, with key-value list. The main operations on a dictionary are storing a value with some key and extracting the value given the key. It is also possible to delete a key:value pair with del. If you store using a key that is already in use, the old value associated with that key is forgotten.

cols={}\ncols(\"column_name_1\") = np.random.normal(2,1,10)\n
  • A list comprehension consists of brackets containing an expression followed by a for clause, then zero or more for or if clauses. The result will be a new list resulting from evaluating the expression in the context of the for and if clauses which follow it. squares = [x**2 for x in range(10)]
  • Queues: do not use list for queue but collections.deque
from collections import deque\n    queue = deque([23,56,78,44])\n    queue.append(55)\nprint(queue)\n\n> deque([23, 56, 78, 44, 55])\ntwentythree=queue.popleft()\n
"},{"location":"python/python-summary/#tuples","title":"Tuples","text":"

tuples: a = (1,2,3,4,5) are iterable. Tuple does not support item assignment: t[3] = 5 \u2192 error.

tup1 = ('physics', 'chemistry', 1997, 2000);\nprint (\"tup1[0]: \", tup1[0]);\n# iteration\nfor a in tup1:\n  print(a)\n

They are immutable. Need to create new tuples from existing one. Removing individual tuple elements is not possible.

Transform a tuple into array:

a=(2, 2.6496666666666666, -30.463416666666667)\nb=np.asarray(a)\n# b array([  2.        ,   2.64966667, -30.46341667])\n

"},{"location":"python/python-summary/#string","title":"String","text":"
# get the last four chars\ndirname[:len(dirname) - 4]\n# split string to get the last folder name of a path\nfolders = path.split('/') \nname= folders[len(folders)-1]\nname.lower()\nname.upper()\nname.index('substr')\nclaim.startswith(planet)\nclaim.endswith(planet)\n

See string tutorial

def word_search(documents, keyword):\n\"\"\"\n    Takes a list of documents (each document is a string) and a keyword. \n    Returns list of the index values into the original list for all documents \n    containing the keyword.\n\n    Example:\n    doc_list = [\"The Learn Python Challenge Casino.\", \"They bought a car\", \"Casinoville\"]\n    >>> word_search(doc_list, 'casino')\n    >>> [0]\n    \"\"\"\n    # list to hold the indices of matching documents\n    indices = [] \n    # Iterate through the indices (i) and elements (doc) of documents\n    for i, doc in enumerate(documents):\n        # Split the string doc into a list of words (according to whitespace)\n        tokens = doc.split()\n        # Make a transformed list where we 'normalize' each word to facilitate matching.\n        # Periods and commas are removed from the end of each word, and it's set to all lowercase.\n        normalized = [token.rstrip('.,').lower() for token in tokens]\n        # Is there a match? If so, update the list of matching indices.\n        if keyword.lower() in normalized:\n            indices.append(i)\n    return indices\n
"},{"location":"python/python-summary/#control-flow","title":"Control flow","text":"

if condition: elsif condition: else

  • For statement iterates over the items of any sequence (a list or a string), in the order that they appear in the sequence
  • Range() to iterate over a sequence of numbers. (e.g. for i in range(5):). The given end point is never part of the generated sequence. It is possible to let the range start at another number, or to specify a different increment (from 0 to 10, increment 3: range(0, 10, 3)). It returns an object which returns the successive items of the desired sequence when you iterate over it
  • list(range(5)) build a list like: [0, 1, 2, 3, 4]
  • Loop statements may have an else clause; it is executed when the loop terminates through exhaustion of the list (with for) or when the condition becomes false (with while), but not when the loop is terminated by a break statement
  • The pass statement does nothing. It can be used when a statement is required syntactically but the program requires no action.

  • See Control Flow Statement Tutorial

"},{"location":"python/python-summary/#exception","title":"Exception","text":"
try:\n  dosomething()\nExcept ValueError:\n  pass\n
"},{"location":"python/python-summary/#regular-expressions","title":"Regular Expressions","text":"

How to regex

Specialize in string pattern matching from string. It is a language by itself.

import re\np = re.compile('ab*')\n
Char Note ^ Matches the beginning of a line $ Matches the end of the line . Matches any character \\s Matches whitespace \\S Matches any non-whitespace character * Repeats a character zero or more times *? Repeats a character zero or more times (non-greedy) + Repeats a character one or more times +? Repeats a character one or more times (non-greedy) [aeiou] Matches a single character in the listed set [^XYZ] Matches a single character not in the listed set [a-z0-9] The set of characters can include a range ( Indicates where string extraction is to start ) Indicates where string extraction is to end \u2018@([^ ]*)' extract the domain name from the email. Use () to specify what to extract, from the @. [^ ] math non-blank character re.findall(\u2018[0-9]+\u2019,s) find all occurrence of number in string. [0-9] is one digit"},{"location":"python/python-summary/#functions","title":"Functions","text":"

Python supports OOD and functional programming like Scala. Function can be defined in a scope of a module file outside of a class, or as method of a class.

  • The keyword def introduces a function definition. It must be followed by the function name and the parenthesized list of formal parameters. The statements that form the body of the function start at the next line, and must be indented.
  • The first statement of the function body can optionally be a string literal used as docstring.
  • local variables cannot be directly assigned a value within a function (unless named in a global statement), although they may be referenced
  • The return statement returns with a value from a function. return without an expression argument returns None.
  • functions can have a variable number of arguments def ask_ok(prompt, retries=4, complaint='Yes or no, please!'):
  • Functions can also be called using keyword arguments of the form kwarg=value instead of using the positional arguments. keyword arguments must follow positional arguments. Arguments could have default value so becomes optional. Attention the default value is evaluated only once. This makes a difference when the default is a mutable object such as a list, dictionary, or instances of most classes. For example, the following function accumulates the arguments passed to it on subsequent calls
def f(a, L=[]):\n    L.append(a)\n    return L\n\nprint(f(1))\n[1]\nprint(f(2))\n[1,2]\nprint(f(3))\n[1,2,3]\n
  • A function can be called with an arbitrary number of arguments. The syntax is def (formalArg,formalArg2,*args,kwarg=value):

  • lambda is a keyword to define an anonymous function

def make_incrementor(n):\n      return lambda x: x + n\nf = make_incrementor(42)\nf(2)\n44\n
This can be used for specifying a sort method anonymously: use the second element of a tuple to sort a list of tuples

collection.sort(key=lambda collection : collection[1])\n

variable scope in function: when there is a need to access variable defined at the module level use the keyword global

"},{"location":"python/python-summary/#namespaces","title":"Namespaces","text":"

A namespace is a mapping from names to objects. Examples of namespaces are:

  • the set of built-in names (containing functions such as abs(), and built-in exception names), loaded when interpreter starts
  • the global names in a module, created when module is loaded, and kept until interpreter quits
  • the local names in a function invocation, created when the functions is called, deleted when function returns or raises an exception. When searching of reference the interpreter starts by the innermost scope (current block) then enclosing functions, modules and built-in names It is important to realize that scopes are determined textually: the global scope of a function defined in a module is that module\u2019s namespace, no matter from where or by what alias the function is called. If no global statement is in effect \u2013 assignments to names always go into the innermost scope. Assignments do not copy data \u2014 they just bind names to objects.
# reference a non local variable in a function\n     nonlocal spam\n# or a global\n    global spam\n
"},{"location":"python/python-summary/#object","title":"Object","text":"

Python is a OOP, with polymorphism and inheritance.

A class construction is a method declare as def __init__(self): A destructor is def __del__(self):. A toString is def _str__():

Class definitions, like function definitions (def statements) must be executed before they have any effect. When a class definition is entered, a new namespace is created, and used as the local scope \u2014 thus, all assignments to local variables go into this new 'class' namespace.

class Complex:\n\"\"\" Represents mathematical Complex number\n    \"\"\"\n    # called once instance is created to initialize internal attributes (like java constructor)\n    def __init__(self, realpart, imagpart):\n        self.r = realpart\n        self.i = imagpart\n\n\n# creating an instance\nx=Complex(3,2)\n# attribute reference ; class.attname\nx.i\n

The only operations understood by instance objects are attribute references. There are two kinds of valid attribute names, data attributes and methods.

class MyClass(object):\n'''\n    classdocs\n    '''\n\n    def __init__(self, p:str):\n'''\n        Constructor\n        '''\n        self.a=p\n        self.n=1\n    def f(self):\n        return self.n+1\n\nc=MyClass('an object')\n# can add attribute dynamically into object even if class did not define it...\nc.b=\"a new attribute\"\nprint(c.a)\n# an object\nprint(c.b)\n# a new attribute\nprint(c.f())\n# 2\n

Clients should use data attributes with care \u2014 clients may mess up invariants maintained by the methods by stamping on their data attributes. Python supports inheritance and search for attributes is done using a depth-first, left to right approach.

class DerivedClassName(Base1, Base2, Base3):\n

There is no private instance variables inside an object. The naming convention using _ before the name should be treated as non-public part of the API.

"},{"location":"python/python-summary/#module","title":"Module","text":"

A module is a file containing python definitions and statements. The filename = module name. Definitions from a module can be imported into other modules or into the main module. Be sure to take the folder hierarchy as package hierarchy. A module can contain executable statements as well as function definitions. These statements are intended to initialize the module. They are executed only the first time the module name is encountered in an import statement.

# To see the variables and function of a module\nimport math\nprint(dir(math))\n# give combined documentation for all the functions and values in the module \nhelp(math)\n
Always import only the specific things we'll need from each module.

To make a module executable we need a main statement

if __name__ == \"__main__\":\n

The directory containing the script being run is placed at the beginning of the search path, ahead of the standard library path: scripts in that directory will be loaded instead of modules of the same name in the library directory. The interpreter first searches for a built-in module then it searches for a file named spam.py in a list of directories given by the variable sys.path (current directory + PYTHONPATH). To speed up loading modules, Python caches the compiled version of each module in the pycache directory under the name module.version.pyc, where the version encodes the format of the compiled file; it generally contains the Python version number.

"},{"location":"python/python-summary/#file-io","title":"File I/O","text":"

You open a file in different mode with the open . Files are text or binary

# first create a text file\nf = open('atext .txt','w' )\nf.write('A first line\\n')\nf.write('A second line\\n')\nf.close()\n\nf = open('atext.txt', 'r')\nf.readline()\n# 'A first line\\n'\nf.readline()\n# 'A second line\\n'\nf.readline()\n# no more line is '' empty string\n\n# read all lines and build a list: 2 ways\nlines=f.readlines()\nlist(f)\n# read line by line: very efficient as use limited memory. f is an iterator over the lines\nfor line in f\n\n# a dictionary persisted as json in text file\nimport json\nf = open('critics.txt', 'w')\njson.dump(critics,f)\nf.close()\n# reload it\ng = open( 'critics.txt','r' )\nd=json.load(g)\nprint(d[' Toby'])\n

Python doesn't flush the buffer\u2014that is, write data to the file\u2014until it's sure you're done writing. One way to do this is to close the file. File objects contain a special pair of built-in methods: __enter__() and __exit__().

See code python-bible/readAssetFromFolder.py which uses Git client to get origin URL.

"},{"location":"python/python-summary/#date","title":"Date","text":"

See the datetime module

import datetime\n\nprint ('Current date/time: {}'.format(datetime.datetime.now()))\n\n d= datetime.date(2018,9,23)\n d= datetime.date.today()\n datetime.datetime.today()\n\ndatetime.datetime(2019, 9, 23, 18, 34, 26, 856722)\n\n\ndate_time_str = 'Jun 28 2018  7:40AM'\ndate_time_obj = datetime.datetime.strptime(date_time_str, '%b %d %Y %I:%M%p')\n\n# transform to a string\nd.strftime(\"%Y-%m-%d %H:%M:%S\")\n
"},{"location":"python/python-summary/#unit-testing","title":"Unit testing","text":"

unittest is based on Kent Beck's work on unit testing like the junit library.

  • define a module with a class which extends TestCase, use the setUp and tearDown methods to set context before each test method.

  • Add test method and use assert* to validate test results.

Consider pytest as another modern tool to do testing in python.

"},{"location":"python/python-summary/#reading-command-line-arguments","title":"Reading command line arguments","text":"
import sys\nprint(\"This is the name of the script: \", sys.argv[0])\nprint(\"Number of arguments: \", len(sys.argv))\nprint(\"The arguments are: \" , str(sys.argv))\n
"},{"location":"python/python-summary/#doing-http-requests","title":"Doing HTTP requests","text":"

See code under web_data, but with python 3 the approach is to use request.

  • urllib
  • The request library
"},{"location":"python/python-summary/#python-flask-webapp","title":"Python Flask WebApp","text":"

The project python-code includes the angular-flask folder to present some simple examples of how to use Flask with Angular.

See this note for details.

"},{"location":"python/python-summary/#data-management","title":"Data management","text":""},{"location":"python/python-summary/#pandas","title":"Pandas","text":"

Create a data frame with two columns

data = DataFrame({'message': [], 'class': []})\n

Create n records with timestamp from one start time:

start_time = datetime.datetime.today() \nc=pd.date_range(start_time, periods=nb_records, freq=METRIC_FREQUENCY)\n

Transforming to string

c.strftime(\"%Y-%m-%d %H:%M:%S\")\n

"},{"location":"python/python-summary/#split-data-into-training-and-test-sets","title":"Split data into training and test sets","text":"
splitIndex = np.random.rand(len(data)) < 0.8\ntrain = data[splitIndex]\ntest = data [~splitIndex]\n
"},{"location":"webcrawling/readme/","title":"Web crawling examples","text":""},{"location":"webcrawling/readme/#requests","title":"Requests","text":"

Requests is one modern library to do http calls.

The Response object includes return code and text or can be transform using json() method.

"},{"location":"webcrawling/readme/#using-python-multiple-threading-for-assessing-the-most-popular-image-on-imgurcom","title":"Using python multiple threading for assessing the most popular image on imgur.com","text":"

This is the implementation from this article from MARCUS MCCURDY

The app was registered in imgur using this link. The client id is set in environment variable:

IMGUR_CLIENT_ID 85b0a015a03ea5798f2572bd6c47b6bd935ec090a10b7d4c1a31378\n

The code is under web_data/imgur folder. The module single.py is loading one image at a time. DownloadWorker.py implements a class which uses Thread to run the download in parallel.

The script creates 8 threads and a queue to get the links from where to download the image. The run method has been overridden, which runs an infinite loop. On every iteration, it calls self.queue.get() to try and fetch a URL to from a thread-safe queue. It blocks until there is an item in the queue for the worker to process.

Once the worker receives an item from the queue, it then calls the same download_link() function, when the download is finished, the worker signals the queue that the task is done. This is very important, because the Queue keeps track of how many tasks were enqueued. The call to queue.join() would block the main thread forever if the workers did not signal that they completed a task.

"}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml new file mode 100644 index 0000000..e755427 --- /dev/null +++ b/sitemap.xml @@ -0,0 +1,58 @@ + + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + + None + 2023-07-28 + daily + + \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz new file mode 100644 index 0000000..290a13a Binary files /dev/null and b/sitemap.xml.gz differ diff --git a/webcrawling/readme/index.html b/webcrawling/readme/index.html new file mode 100644 index 0000000..faddc04 --- /dev/null +++ b/webcrawling/readme/index.html @@ -0,0 +1,2 @@ + Web Data - Python code studies

Web crawling examples

Requests

Requests is one modern library to do http calls.

The Response object includes return code and text or can be transform using json() method.

This is the implementation from this article from MARCUS MCCURDY

The app was registered in imgur using this link. The client id is set in environment variable:

IMGUR_CLIENT_ID 85b0a015a03ea5798f2572bd6c47b6bd935ec090a10b7d4c1a31378
+

The code is under web_data/imgur folder. The module single.py is loading one image at a time. DownloadWorker.py implements a class which uses Thread to run the download in parallel.

The script creates 8 threads and a queue to get the links from where to download the image. The run method has been overridden, which runs an infinite loop. On every iteration, it calls self.queue.get() to try and fetch a URL to from a thread-safe queue. It blocks until there is an item in the queue for the worker to process.

Once the worker receives an item from the queue, it then calls the same download_link() function, when the download is finished, the worker signals the queue that the task is done. This is very important, because the Queue keeps track of how many tasks were enqueued. The call to queue.join() would block the main thread forever if the workers did not signal that they completed a task.

\ No newline at end of file